diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index a93e6fcc0ad9..fdd3a0c9cb37 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -34,8 +34,8 @@ xe-y += xe_bb.o \ xe_dma_buf.o \ xe_drm_client.o \ xe_exec.o \ - xe_execlist.o \ xe_exec_queue.o \ + xe_execlist.o \ xe_force_wake.o \ xe_ggtt.o \ xe_gpu_scheduler.o \ @@ -56,6 +56,7 @@ xe-y += xe_bb.o \ xe_gt_topology.o \ xe_guc.o \ xe_guc_ads.o \ + xe_guc_buf.o \ xe_guc_capture.o \ xe_guc_ct.o \ xe_guc_db_mgr.o \ @@ -66,11 +67,11 @@ xe-y += xe_bb.o \ xe_guc_pc.o \ xe_guc_submit.o \ xe_heci_gsc.o \ + xe_huc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ xe_hw_engine_group.o \ xe_hw_fence.o \ - xe_huc.o \ xe_irq.o \ xe_lrc.o \ xe_migrate.o \ @@ -90,11 +91,12 @@ xe-y += xe_bb.o \ xe_range_fence.o \ xe_reg_sr.o \ xe_reg_whitelist.o \ - xe_rtp.o \ xe_ring_ops.o \ + xe_rtp.o \ xe_sa.o \ xe_sched_job.o \ xe_step.o \ + xe_survivability_mode.o \ xe_sync.o \ xe_tile.o \ xe_tile_sysfs.o \ @@ -102,8 +104,8 @@ xe-y += xe_bb.o \ xe_trace_bo.o \ xe_trace_guc.o \ xe_trace_lrc.o \ - xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ + xe_ttm_sys_mgr.o \ xe_ttm_vram_mgr.o \ xe_tuning.o \ xe_uc.o \ @@ -111,8 +113,9 @@ xe-y += xe_bb.o \ xe_vm.o \ xe_vram.o \ xe_vram_freq.o \ - xe_wait_user_fence.o \ + xe_vsec.o \ xe_wa.o \ + xe_wait_user_fence.o \ xe_wopcm.o xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o @@ -120,6 +123,8 @@ xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o +xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o + # graphics virtualization (SR-IOV) support xe-y += \ xe_gt_sriov_vf.o \ @@ -219,6 +224,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_display_wa.o \ i915-display/intel_dkl_phy.o \ i915-display/intel_dmc.o \ + i915-display/intel_dmc_wl.o \ i915-display/intel_dp.o \ i915-display/intel_dp_aux.o \ i915-display/intel_dp_aux_backlight.o \ @@ -266,7 +272,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_vdsc.o \ i915-display/intel_vga.o \ i915-display/intel_vrr.o \ - i915-display/intel_dmc_wl.o \ i915-display/intel_wm.o \ i915-display/skl_scaler.o \ i915-display/skl_universal_plane.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_capture_abi.h b/drivers/gpu/drm/xe/abi/guc_capture_abi.h index e7898edc6236..dd4117553739 100644 --- a/drivers/gpu/drm/xe/abi/guc_capture_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_capture_abi.h @@ -25,7 +25,7 @@ enum guc_state_capture_type { #define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1) -/* Class indecies for capture_class and capture_instance arrays */ +/* Class indices for capture_class and capture_instance arrays */ enum guc_capture_list_class_type { GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0, GUC_CAPTURE_LIST_CLASS_VIDEO = 1, diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 7dcb118e3d9f..d633f1c739e4 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -132,7 +132,7 @@ enum { * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001 * This config sets whether strict scheduling is enabled whereby any VF * that doesn’t have work to submit is still allocated a fixed execution - * time-slice to ensure active VFs execution is always consitent even + * time-slice to ensure active VFs execution is always consistent even * during other VF reprovisiong / rebooting events. Changing this KLV * impacts all VFs and takes effect on the next VF-Switch event. * @@ -207,7 +207,7 @@ enum { * of and this will never be perfectly-exact (accumulated nano-second * granularity) since the GPUs clock time runs off a different crystal * from the CPUs clock. Changing this KLV on a VF that is currently - * running a context wont take effect until a new context is scheduled in. + * running a context won't take effect until a new context is scheduled in. * That said, when the PF is changing this value from 0x0 to * a non-zero value, it might never take effect if the VF is running an * infinitely long compute or shader kernel. In such a scenario, the @@ -227,7 +227,7 @@ enum { * HW is capable and this will never be perfectly-exact (accumulated * nano-second granularity) since the GPUs clock time runs off a * different crystal from the CPUs clock. Changing this KLV on a VF - * that is currently running a context wont take effect until a new + * that is currently running a context won't take effect until a new * context is scheduled in. * That said, when the PF is changing this value from 0x0 to * a non-zero value, it might never take effect if the VF is running an diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 9c4cf050059a..41d39d67817a 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -1,3 +1,8 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + #ifndef _I915_GEM_STOLEN_H_ #define _I915_GEM_STOLEN_H_ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 686c39f320e4..0382beb4035b 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -117,19 +117,10 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg, unsigned int slow_timeout_ms, u32 *out_value) { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); - bool atomic; - - /* - * Replicate the behavior from i915 here, in which sleep is not - * performed if slow_timeout_ms == 0. This is necessary because - * of some paths in display code where waits are done in atomic - * context. - */ - atomic = !slow_timeout_ms && fast_timeout_us > 0; return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value, fast_timeout_us + 1000 * slow_timeout_ms, - out_value, atomic); + out_value, false); } static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore, diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c index a7dbc6554d69..0c0f4533c34f 100644 --- a/drivers/gpu/drm/xe/display/ext/i915_irq.c +++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c @@ -64,7 +64,7 @@ bool intel_irqs_enabled(struct xe_device *xe) * But at this point the xe irq is better protected against races, * although the full solution would be protecting the i915 side. */ - return xe->irq.enabled; + return atomic_read(&xe->irq.enabled); } void intel_synchronize_irq(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 7c78496e6213..d86219dedde2 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -83,6 +83,8 @@ #define RING_IMR(base) XE_REG((base) + 0xa8) #define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac) +#define CS_INT_VEC(base) XE_REG((base) + 0x1b8) + #define RING_EIR(base) XE_REG((base) + 0xb0) #define RING_EMR(base) XE_REG((base) + 0xb4) #define RING_ESR(base) XE_REG((base) + 0xb8) @@ -138,6 +140,7 @@ #define RING_MODE(base) XE_REG((base) + 0x29c) #define GFX_DISABLE_LEGACY_MODE REG_BIT(3) +#define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13) #define RING_TIMESTAMP(base) XE_REG((base) + 0x358) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index fc74de339f02..096859072396 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -221,6 +221,9 @@ #define MIRROR_FUSE1 XE_REG(0x911c) +#define MIRROR_L3BANK_ENABLE XE_REG(0x9130) +#define XE3_L3BANK_ENABLE REG_GENMASK(31, 0) + #define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ #define XELP_EU_MASK REG_GENMASK(7, 0) #define XELP_GT_SLICE_ENABLE XE_REG(0x9138) @@ -410,12 +413,6 @@ #define XE2LPM_SCRATCH3_LBCF XE_REG_MCR(0xb654) -#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604) - -#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) - -#define XE2LPM_SCRATCH3_LBCF XE_REG_MCR(0xb654) - #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) @@ -506,6 +503,9 @@ #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) #define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) +#define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED) +#define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12) + #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define WR_REQ_CHAINING_DIS REG_BIT(26) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 045dfd09db99..57944f90bbf6 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -25,6 +25,9 @@ #define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) #define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) +#define CTX_CS_INT_VEC_REG 0x5a +#define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1) + #define INDIRECT_CTX_RING_HEAD (0x02 + 1) #define INDIRECT_CTX_RING_TAIL (0x04 + 1) #define INDIRECT_CTX_RING_START (0x06 + 1) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index a9b0091cb7ee..a79ad2da070c 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -41,14 +41,6 @@ #define OAG_OABUFFER XE_REG(0xdb08) #define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) -#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) -#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) -#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) -#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) -#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) -#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) -#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) -#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ #define OAG_OACONTROL XE_REG(0xdaf4) @@ -59,10 +51,15 @@ /* Common to all OA units */ #define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) #define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) +#define OAG_OACONTROL_USED_BITS \ + (OAG_OACONTROL_OA_PES_DISAG_EN | OAG_OACONTROL_OA_CCS_SELECT_MASK | \ + OAG_OACONTROL_OA_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE | \ + OA_OACONTROL_REPORT_BC_MASK | OA_OACONTROL_COUNTER_SIZE_MASK) #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) #define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) #define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13) +#define OAG_OA_DEBUG_BUF_SIZE_SELECT REG_BIT(12) #define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8) #define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7) #define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) @@ -85,6 +82,8 @@ #define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) #define OAM_CONTROL_OFFSET (0x194) #define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAM_OACONTROL_USED_BITS \ + (OAM_CONTROL_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE) #define OAM_DEBUG_OFFSET (0x198) #define OAM_STATUS_OFFSET (0x19c) #define OAM_MMIO_TRG_OFFSET (0x1d0) diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h new file mode 100644 index 000000000000..f45abcd96ba8 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ +#ifndef _XE_PMT_H_ +#define _XE_PMT_H_ + +#define SOC_BASE 0x280000 + +#define BMG_PMT_BASE_OFFSET 0xDB000 +#define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) + +#define BMG_TELEMETRY_BASE_OFFSET 0xE0000 +#define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET) + +#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08) +#define SG_REMAP_BITS REG_GENMASK(31, 24) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 51fd40ffafcb..89716172fbb8 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -13,7 +13,7 @@ /** * struct xe_reg - Register definition * - * Register defintion to be used by the individual register. Although the same + * Register definition to be used by the individual register. Although the same * definition is used for xe_reg and xe_reg_mcr, they use different internal * APIs for accesses. */ @@ -21,7 +21,7 @@ struct xe_reg { union { struct { /** @addr: address */ - u32 addr:28; + u32 addr:22; /** * @masked: register is "masked", with upper 16bits used * to identify the bits that are updated on the lower @@ -41,10 +41,6 @@ struct xe_reg { * @vf: register is accessible from the Virtual Function. */ u32 vf:1; - /** - * @ext: access MMIO extension space for current register. - */ - u32 ext:1; }; /** @raw: Raw value with both address and options */ u32 raw; @@ -111,16 +107,6 @@ struct xe_reg_mcr { */ #define XE_REG(r_, ...) ((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__)) -/** - * XE_REG_EXT - Create a struct xe_reg from extension offset and additional - * flags - * @r_: Register extension offset - * @...: Additional options like access mode. See struct xe_reg for available - * options. - */ -#define XE_REG_EXT(r_, ...) \ - ((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .ext = 1)) - /** * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags * @r_: Register offset diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 3293172b0128..6cf282618836 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -44,12 +44,16 @@ #define MTL_RP_STATE_CAP XE_REG(0x138000) +#define MTL_GT_RPA_FREQUENCY XE_REG(0x138008) #define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c) #define MTL_MEDIAP_STATE_CAP XE_REG(0x138020) #define MTL_RPN_CAP_MASK REG_GENMASK(24, 16) #define MTL_RP0_CAP_MASK REG_GENMASK(8, 0) +#define MTL_MPA_FREQUENCY XE_REG(0x138028) +#define MTL_RPA_MASK REG_GENMASK(8, 0) + #define MTL_MPE_FREQUENCY XE_REG(0x13802c) #define MTL_RPE_MASK REG_GENMASK(8, 0) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 1f8c49679878..6795d1d916e4 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include @@ -264,10 +264,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc * however seems quite fragile not to also restart the GT. Try * to do that here by triggering a GT reset. */ - for_each_gt(__gt, xe, id) { - xe_gt_reset_async(__gt); - flush_work(&__gt->reset.worker); - } + for_each_gt(__gt, xe, id) + xe_gt_reset(__gt); + if (err) { KUNIT_FAIL(test, "restore kernel err=%pe\n", ERR_PTR(err)); @@ -606,8 +605,6 @@ static void xe_bo_shrink_kunit(struct kunit *test) static struct kunit_case xe_bo_tests[] = { KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), - KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, - {.speed = KUNIT_SPEED_SLOW}), {} }; @@ -618,3 +615,17 @@ struct kunit_suite xe_bo_test_suite = { .init = xe_kunit_helper_xe_device_live_test_init, }; EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite); + +static struct kunit_case xe_bo_shrink_test[] = { + KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, + {.speed = KUNIT_SPEED_SLOW}), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_bo_shrink_test_suite = { + .name = "xe_bo_shrink", + .test_cases = xe_bo_shrink_test, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_bo_shrink_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c new file mode 100644 index 000000000000..6faffcd74869 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include +#include +#include + +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_guc_ct.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +#define DUT_GGTT_START SZ_1M +#define DUT_GGTT_SIZE SZ_2M + +static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device *xe, + struct xe_tile *tile, + size_t size, u32 flags) +{ + struct kunit *test = kunit_get_current_test(); + struct xe_bo *bo; + void *buf; + + bo = drmm_kzalloc(&xe->drm, sizeof(*bo), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); + + buf = drmm_kzalloc(&xe->drm, size, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf); + + bo->tile = tile; + bo->ttm.bdev = &xe->ttm; + bo->size = size; + iosys_map_set_vaddr(&bo->vmap, buf); + + if (flags & XE_BO_FLAG_GGTT) { + struct xe_ggtt *ggtt = tile->mem.ggtt; + + bo->ggtt_node[tile->id] = xe_ggtt_node_init(ggtt); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo->ggtt_node[tile->id]); + + KUNIT_ASSERT_EQ(test, 0, + drm_mm_insert_node_in_range(&ggtt->mm, + &bo->ggtt_node[tile->id]->base, + bo->size, SZ_4K, + 0, 0, U64_MAX, 0)); + } + + return bo; +} + +static int guc_buf_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_ggtt *ggtt; + struct xe_guc *guc; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + ggtt = xe_device_get_root_tile(test->priv)->mem.ggtt; + guc = &xe_device_get_gt(test->priv, 0)->uc.guc; + + drm_mm_init(&ggtt->mm, DUT_GGTT_START, DUT_GGTT_SIZE); + mutex_init(&ggtt->lock); + + kunit_activate_static_stub(test, xe_managed_bo_create_pin_map, + replacement_xe_managed_bo_create_pin_map); + + KUNIT_ASSERT_EQ(test, 0, xe_guc_buf_cache_init(&guc->buf)); + + test->priv = &guc->buf; + return 0; +} + +static void test_smallest(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + + buf = xe_guc_buf_reserve(cache, 1); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_EXPECT_NOT_NULL(test, xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_LE(test, DUT_GGTT_START, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_GT(test, DUT_GGTT_START + DUT_GGTT_SIZE, xe_guc_buf_gpu_addr(buf)); + xe_guc_buf_release(buf); +} + +static void test_largest(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + + buf = xe_guc_buf_reserve(cache, xe_guc_buf_cache_dwords(cache)); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_EXPECT_NOT_NULL(test, xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_LE(test, DUT_GGTT_START, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_GT(test, DUT_GGTT_START + DUT_GGTT_SIZE, xe_guc_buf_gpu_addr(buf)); + xe_guc_buf_release(buf); +} + +static void test_granular(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf *bufs; + int n, dwords; + + dwords = xe_guc_buf_cache_dwords(cache); + bufs = kunit_kcalloc(test, dwords, sizeof(*bufs), GFP_KERNEL); + KUNIT_EXPECT_NOT_NULL(test, bufs); + + for (n = 0; n < dwords; n++) + bufs[n] = xe_guc_buf_reserve(cache, 1); + + for (n = 0; n < dwords; n++) + KUNIT_EXPECT_TRUE_MSG(test, xe_guc_buf_is_valid(bufs[n]), "n=%d", n); + + for (n = 0; n < dwords; n++) + xe_guc_buf_release(bufs[n]); +} + +static void test_unique(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf *bufs; + int n, m, dwords; + + dwords = xe_guc_buf_cache_dwords(cache); + bufs = kunit_kcalloc(test, dwords, sizeof(*bufs), GFP_KERNEL); + KUNIT_EXPECT_NOT_NULL(test, bufs); + + for (n = 0; n < dwords; n++) + bufs[n] = xe_guc_buf_reserve(cache, 1); + + for (n = 0; n < dwords; n++) { + for (m = n + 1; m < dwords; m++) { + KUNIT_EXPECT_PTR_NE_MSG(test, xe_guc_buf_cpu_ptr(bufs[n]), + xe_guc_buf_cpu_ptr(bufs[m]), "n=%d, m=%d", n, m); + KUNIT_ASSERT_NE_MSG(test, xe_guc_buf_gpu_addr(bufs[n]), + xe_guc_buf_gpu_addr(bufs[m]), "n=%d, m=%d", n, m); + } + } + + for (n = 0; n < dwords; n++) + xe_guc_buf_release(bufs[n]); +} + +static void test_overlap(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf b1, b2; + u32 dwords = xe_guc_buf_cache_dwords(cache) / 2; + u32 bytes = dwords * sizeof(u32); + void *p1, *p2; + u64 a1, a2; + + b1 = xe_guc_buf_reserve(cache, dwords); + b2 = xe_guc_buf_reserve(cache, dwords); + + p1 = xe_guc_buf_cpu_ptr(b1); + p2 = xe_guc_buf_cpu_ptr(b2); + + a1 = xe_guc_buf_gpu_addr(b1); + a2 = xe_guc_buf_gpu_addr(b2); + + KUNIT_EXPECT_PTR_NE(test, p1, p2); + if (p1 < p2) + KUNIT_EXPECT_LT(test, (uintptr_t)(p1 + bytes - 1), (uintptr_t)p2); + else + KUNIT_EXPECT_LT(test, (uintptr_t)(p2 + bytes - 1), (uintptr_t)p1); + + KUNIT_EXPECT_NE(test, a1, a2); + if (a1 < a2) + KUNIT_EXPECT_LT(test, a1 + bytes - 1, a2); + else + KUNIT_EXPECT_LT(test, a2 + bytes - 1, a1); + + xe_guc_buf_release(b1); + xe_guc_buf_release(b2); +} + +static void test_reusable(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf b1, b2; + void *p1; + u64 a1; + + b1 = xe_guc_buf_reserve(cache, xe_guc_buf_cache_dwords(cache)); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(b1)); + KUNIT_EXPECT_NOT_NULL(test, p1 = xe_guc_buf_cpu_ptr(b1)); + KUNIT_EXPECT_NE(test, 0, a1 = xe_guc_buf_gpu_addr(b1)); + xe_guc_buf_release(b1); + + b2 = xe_guc_buf_reserve(cache, xe_guc_buf_cache_dwords(cache)); + KUNIT_EXPECT_PTR_EQ(test, p1, xe_guc_buf_cpu_ptr(b2)); + KUNIT_EXPECT_EQ(test, a1, xe_guc_buf_gpu_addr(b2)); + xe_guc_buf_release(b2); +} + +static void test_too_big(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + + buf = xe_guc_buf_reserve(cache, xe_guc_buf_cache_dwords(cache) + 1); + KUNIT_EXPECT_FALSE(test, xe_guc_buf_is_valid(buf)); + xe_guc_buf_release(buf); /* shouldn't crash */ +} + +static void test_flush(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + const u32 dwords = xe_guc_buf_cache_dwords(cache); + const u32 bytes = dwords * sizeof(u32); + u32 *s, *p, *d; + int n; + + KUNIT_ASSERT_NOT_NULL(test, s = kunit_kcalloc(test, dwords, sizeof(u32), GFP_KERNEL)); + KUNIT_ASSERT_NOT_NULL(test, d = kunit_kcalloc(test, dwords, sizeof(u32), GFP_KERNEL)); + + for (n = 0; n < dwords; n++) + s[n] = n; + + buf = xe_guc_buf_reserve(cache, dwords); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_ASSERT_NOT_NULL(test, p = xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_PTR_NE(test, p, s); + KUNIT_EXPECT_PTR_NE(test, p, d); + + memcpy(p, s, bytes); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_flush(buf)); + + iosys_map_memcpy_from(d, &cache->sam->bo->vmap, 0, bytes); + KUNIT_EXPECT_MEMEQ(test, s, d, bytes); + + xe_guc_buf_release(buf); +} + +static void test_lookup(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + u32 dwords; + u64 addr; + u32 *p; + int n; + + dwords = xe_guc_buf_cache_dwords(cache); + buf = xe_guc_buf_reserve(cache, dwords); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_ASSERT_NOT_NULL(test, p = xe_guc_buf_cpu_ptr(buf)); + KUNIT_ASSERT_NE(test, 0, addr = xe_guc_buf_gpu_addr(buf)); + + KUNIT_EXPECT_EQ(test, 0, xe_guc_cache_gpu_addr_from_ptr(cache, p - 1, sizeof(u32))); + KUNIT_EXPECT_EQ(test, 0, xe_guc_cache_gpu_addr_from_ptr(cache, p + dwords, sizeof(u32))); + + for (n = 0; n < dwords; n++) + KUNIT_EXPECT_EQ_MSG(test, xe_guc_cache_gpu_addr_from_ptr(cache, p + n, sizeof(u32)), + addr + n * sizeof(u32), "n=%d", n); + + xe_guc_buf_release(buf); +} + +static void test_data(struct kunit *test) +{ + static const u32 data[] = { 1, 2, 3, 4, 5, 6 }; + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + void *p; + + buf = xe_guc_buf_from_data(cache, data, sizeof(data)); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_ASSERT_NOT_NULL(test, p = xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_MEMEQ(test, p, data, sizeof(data)); + + xe_guc_buf_release(buf); +} + +static void test_class(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + u32 dwords = xe_guc_buf_cache_dwords(cache); + + { + CLASS(xe_guc_buf, buf)(cache, dwords); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_EXPECT_NOT_NULL(test, xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_LE(test, DUT_GGTT_START, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_GT(test, DUT_GGTT_START + DUT_GGTT_SIZE, xe_guc_buf_gpu_addr(buf)); + } + + { + CLASS(xe_guc_buf, buf)(cache, dwords); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_EXPECT_NOT_NULL(test, xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_LE(test, DUT_GGTT_START, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_GT(test, DUT_GGTT_START + DUT_GGTT_SIZE, xe_guc_buf_gpu_addr(buf)); + } +} + +static struct kunit_case guc_buf_test_cases[] = { + KUNIT_CASE(test_smallest), + KUNIT_CASE(test_largest), + KUNIT_CASE(test_granular), + KUNIT_CASE(test_unique), + KUNIT_CASE(test_overlap), + KUNIT_CASE(test_reusable), + KUNIT_CASE(test_too_big), + KUNIT_CASE(test_flush), + KUNIT_CASE(test_lookup), + KUNIT_CASE(test_data), + KUNIT_CASE(test_class), + {} +}; + +static struct kunit_suite guc_buf_suite = { + .name = "guc_buf", + .test_cases = guc_buf_test_cases, + .init = guc_buf_test_init, +}; + +kunit_test_suites(&guc_buf_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c index 5f14737c8210..af817419e077 100644 --- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c @@ -6,11 +6,13 @@ #include extern struct kunit_suite xe_bo_test_suite; +extern struct kunit_suite xe_bo_shrink_test_suite; extern struct kunit_suite xe_dma_buf_test_suite; extern struct kunit_suite xe_migrate_test_suite; extern struct kunit_suite xe_mocs_test_suite; kunit_test_suite(xe_bo_test_suite); +kunit_test_suite(xe_bo_shrink_test_suite); kunit_test_suite(xe_dma_buf_test_suite); kunit_test_suite(xe_migrate_test_suite); kunit_test_suite(xe_mocs_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 6f9b7a266b41..ef1e5256c56a 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -58,7 +58,7 @@ static void read_l3cc_table(struct xe_gt *gt, mocs_dbg(gt, "reg_val=0x%x\n", reg_val); } else { - /* Just re-use value read on previous iteration */ + /* Just reuse value read on previous iteration */ reg_val >>= 16; } @@ -162,8 +162,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe) if (flags & HAS_LNCF_MOCS) read_l3cc_table(gt, &mocs.table); - xe_gt_reset_async(gt); - flush_work(>->reset.worker); + xe_gt_reset(gt); kunit_info(test, "mocs_reset_test after reset\n"); if (flags & HAS_GLOBAL_MOCS) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index ef777dbdf4ec..9570672fce33 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -41,7 +41,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) /* * We need to allocate space for the requested number of dwords, * one additional MI_BATCH_BUFFER_END dword, and additional buffer - * space to accomodate the platform-specific hardware prefetch + * space to accommodate the platform-specific hardware prefetch * requirements. */ bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 283cd0294570..b6e77afb58f9 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -15,6 +15,8 @@ #include #include +#include + #include "xe_device.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" @@ -713,6 +715,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } + /* Reject BO eviction if BO is bound to current VM. */ + if (evict && ctx->resv) { + struct drm_gpuvm_bo *vm_bo; + + drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) { + struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); + + if (xe_vm_resv(vm) == ctx->resv && + xe_vm_in_preempt_fence_mode(vm)) { + ret = -EBUSY; + goto out; + } + } + } + /* * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. @@ -733,7 +750,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, new_mem->mem_type == XE_PL_SYSTEM) { long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, - true, + false, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { ret = timeout; @@ -776,7 +793,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, */ xe_pm_runtime_get(xe); } else { - drm_WARN_ON(&xe->drm, handle_system_ccs); + // drm_WARN_ON(&xe->drm, handle_system_ccs); xe_pm_runtime_get_noresume(xe); } @@ -786,7 +803,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, * / resume, some of the pinned memory is required for the * device to resume / use the GPU to move other evicted memory * (user memory) around. This likely could be optimized a bit - * futher where we find the minimum set of pinned memory + * further where we find the minimum set of pinned memory * required for resume but for simplity doing a memcpy for all * pinned memory. */ @@ -857,8 +874,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, out: if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && - ttm_bo->ttm) + ttm_bo->ttm) { + long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, + DMA_RESV_USAGE_KERNEL, + false, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + ret = timeout; + xe_tt_unmap_sg(ttm_bo->ttm); + } return ret; } @@ -867,7 +892,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory * @bo: The buffer object to move. * - * On successful completion, the object memory will be moved to sytem memory. + * On successful completion, the object memory will be moved to system memory. * * This is needed to for special handling of pinned VRAM object during * suspend-resume. @@ -1362,7 +1387,7 @@ static const struct drm_gem_object_funcs xe_gem_object_funcs = { /** * xe_bo_alloc - Allocate storage for a struct xe_bo * - * This funcition is intended to allocate storage to be used for input + * This function is intended to allocate storage to be used for input * to __xe_bo_create_locked(), in the case a pointer to the bo to be * created is needed before the call to __xe_bo_create_locked(). * If __xe_bo_create_locked ends up never to be called, then the @@ -1636,6 +1661,7 @@ __xe_bo_create_locked(struct xe_device *xe, } } + trace_xe_bo_create(bo); return bo; err_unlock_put_bo: @@ -1773,6 +1799,8 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile struct xe_bo *bo; int ret; + KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags); + bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags); if (IS_ERR(bo)) return bo; @@ -2255,9 +2283,26 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags)) + if (XE_IOCTL_DBG(xe, args->flags & + ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER)) return -EINVAL; + if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) { + if (XE_IOCTL_DBG(xe, !IS_DGFX(xe))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->handle)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K)) + return -EINVAL; + + BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) + + SZ_4K) >= DRM_FILE_PAGE_OFFSET_START); + args->offset = XE_PCI_BARRIER_MMAP_OFFSET; + return 0; + } + gem_obj = drm_gem_object_lookup(file, args->handle); if (XE_IOCTL_DBG(xe, !gem_obj)) return -ENOENT; @@ -2404,7 +2449,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) * @force_alloc: Set force_alloc in ttm_operation_ctx * * On successful completion, the object memory will be moved to evict - * placement. Ths function blocks until the object has been fully moved. + * placement. This function blocks until the object has been fully moved. * * Return: 0 on success. Negative error code on failure. */ diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index d9386ab03140..04995c5ced32 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -75,6 +75,8 @@ #define XE_BO_PROPS_INVALID (-1) +#define XE_PCI_BARRIER_MMAP_OFFSET (0x50 << XE_PTE_SHIFT) + struct sg_table; struct xe_bo *xe_bo_alloc(void); diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h index f57d440cc95a..25a884c64bf1 100644 --- a/drivers/gpu/drm/xe/xe_bo_doc.h +++ b/drivers/gpu/drm/xe/xe_bo_doc.h @@ -41,7 +41,7 @@ * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All * user BOs are evictable and user BOs are never pinned by XE. The allocation of - * the backing store can be defered from creation time until first use which is + * the backing store can be deferred from creation time until first use which is * either mmap, bind, or pagefault. * * Private BOs diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 46dc9e4e3e46..5387e0456625 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -10,6 +10,7 @@ #include #include +#include #include #include "xe_device_types.h" diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 52cba5154d35..39fe485d2085 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -22,8 +22,8 @@ #include "xe_guc_log.h" #include "xe_guc_submit.h" #include "xe_hw_engine.h" -#include "xe_pm.h" #include "xe_module.h" +#include "xe_pm.h" #include "xe_sched_job.h" #include "xe_vm.h" @@ -48,7 +48,7 @@ * * **Coredump release**: * After a coredump is generated, it stays in kernel memory until released by - * userpace by writing anything to it, or after an internal timer expires. The + * userspace by writing anything to it, or after an internal timer expires. The * exact timeout may vary and should not be relied upon. Example to release * a coredump: * @@ -391,25 +391,25 @@ int xe_devcoredump_init(struct xe_device *xe) /** * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85 * - * The output is split to multiple lines because some print targets, e.g. dmesg - * cannot handle arbitrarily long lines. Note also that printing to dmesg in - * piece-meal fashion is not possible, each separate call to drm_puts() has a - * line-feed automatically added! Therefore, the entire output line must be - * constructed in a local buffer first, then printed in one atomic output call. + * The output is split into multiple calls to drm_puts() because some print + * targets, e.g. dmesg, cannot handle arbitrarily long lines. These targets may + * add newlines, as is the case with dmesg: each drm_puts() call creates a + * separate line. * * There is also a scheduler yield call to prevent the 'task has been stuck for * 120s' kernel hang check feature from firing when printing to a slow target * such as dmesg over a serial port. * - * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down. - * * @p: the printer object to output to * @prefix: optional prefix to add to output string + * @suffix: optional suffix to add at the end. 0 disables it and is + * not added to the output, which is useful when using multiple calls + * to dump data to @p * @blob: the Binary Large OBject to dump out * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32) * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32) */ -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; @@ -417,7 +417,8 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, size_t line_pos = 0; #define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ + /* Always leave space for the suffix char and the \0 */ +#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -449,7 +450,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; drm_puts(p, line_buff); @@ -461,10 +461,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } } + if (suffix) + line_buff[line_pos++] = suffix; + if (line_pos) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; - drm_puts(p, line_buff); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6a17e6d60102..5391a80a4d1b 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size); #endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index bcb2907fb9e3..0460ce13a241 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -49,13 +49,16 @@ #include "xe_pat.h" #include "xe_pcode.h" #include "xe_pm.h" +#include "xe_pmu.h" #include "xe_query.h" #include "xe_sriov.h" +#include "xe_survivability_mode.h" #include "xe_tile.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" #include "xe_vram.h" +#include "xe_vsec.h" #include "xe_wait_user_fence.h" #include "xe_wa.h" @@ -231,12 +234,117 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo #define xe_drm_compat_ioctl NULL #endif +static void barrier_open(struct vm_area_struct *vma) +{ + drm_dev_get(vma->vm_private_data); +} + +static void barrier_close(struct vm_area_struct *vma) +{ + drm_dev_put(vma->vm_private_data); +} + +static void barrier_release_dummy_page(struct drm_device *dev, void *res) +{ + struct page *dummy_page = (struct page *)res; + + __free_page(dummy_page); +} + +static vm_fault_t barrier_fault(struct vm_fault *vmf) +{ + struct drm_device *dev = vmf->vma->vm_private_data; + struct vm_area_struct *vma = vmf->vma; + vm_fault_t ret = VM_FAULT_NOPAGE; + pgprot_t prot; + int idx; + + prot = vm_get_page_prot(vma->vm_flags); + + if (drm_dev_enter(dev, &idx)) { + unsigned long pfn; + +#define LAST_DB_PAGE_OFFSET 0x7ff001 + pfn = PHYS_PFN(pci_resource_start(to_pci_dev(dev->dev), 0) + + LAST_DB_PAGE_OFFSET); + ret = vmf_insert_pfn_prot(vma, vma->vm_start, pfn, + pgprot_noncached(prot)); + drm_dev_exit(idx); + } else { + struct page *page; + + /* Allocate new dummy page to map all the VA range in this VMA to it*/ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return VM_FAULT_OOM; + + /* Set the page to be freed using drmm release action */ + if (drmm_add_action_or_reset(dev, barrier_release_dummy_page, page)) + return VM_FAULT_OOM; + + ret = vmf_insert_pfn_prot(vma, vma->vm_start, page_to_pfn(page), + prot); + } + + return ret; +} + +static const struct vm_operations_struct vm_ops_barrier = { + .open = barrier_open, + .close = barrier_close, + .fault = barrier_fault, +}; + +static int xe_pci_barrier_mmap(struct file *filp, + struct vm_area_struct *vma) +{ + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + struct xe_device *xe = to_xe_device(dev); + + if (!IS_DGFX(xe)) + return -EINVAL; + + if (vma->vm_end - vma->vm_start > SZ_4K) + return -EINVAL; + + if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; + + if (vma->vm_flags & (VM_READ | VM_EXEC)) + return -EINVAL; + + vm_flags_clear(vma, VM_MAYREAD | VM_MAYEXEC); + vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); + vma->vm_ops = &vm_ops_barrier; + vma->vm_private_data = dev; + drm_dev_get(vma->vm_private_data); + + return 0; +} + +static int xe_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + switch (vma->vm_pgoff) { + case XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT: + return xe_pci_barrier_mmap(filp, vma); + } + + return drm_gem_mmap(filp, vma); +} + static const struct file_operations xe_driver_fops = { .owner = THIS_MODULE, .open = drm_open, .release = drm_release_noglobal, .unlocked_ioctl = xe_drm_ioctl, - .mmap = drm_gem_mmap, + .mmap = xe_mmap, .poll = drm_poll, .read = drm_read, .compat_ioctl = xe_drm_compat_ioctl, @@ -270,6 +378,7 @@ static struct drm_driver driver = { .fops = &xe_driver_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, + /*.date = DRIVER_DATE,*/ .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, @@ -324,7 +433,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.revid = pdev->revision; xe->info.force_execlist = xe_modparam.force_execlist; - spin_lock_init(&xe->irq.lock); + err = xe_irq_init(xe); + if (err) + goto err; init_waitqueue_head(&xe->ufence_wq); @@ -366,6 +477,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, goto err; } + err = drmm_mutex_init(&xe->drm, &xe->pmt.lock); + if (err) + goto err; + err = xe_display_create(xe); if (WARN_ON(err)) goto err; @@ -580,8 +695,12 @@ int xe_device_probe_early(struct xe_device *xe) update_device_info(xe); err = xe_pcode_probe_early(xe); - if (err) + if (err) { + if (xe_survivability_mode_required(xe)) + xe_survivability_mode_init(xe); + return err; + } err = wait_for_lmem_ready(xe); if (err) @@ -599,7 +718,7 @@ static int probe_has_flat_ccs(struct xe_device *xe) u32 reg; /* Always enabled/disabled, no runtime check to do */ - if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) + if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs || IS_SRIOV_VF(xe)) return 0; gt = xe_root_mmio_gt(xe); @@ -727,6 +846,12 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err; + for_each_tile(tile, xe, id) { + err = xe_tile_init(tile); + if (err) + goto err; + } + for_each_gt(gt, xe, id) { last_gt = id; @@ -753,6 +878,8 @@ int xe_device_probe(struct xe_device *xe) xe_oa_register(xe); + xe_pmu_register(&xe->pmu); + xe_debugfs_register(xe); xe_hwmon_register(xe); @@ -760,6 +887,8 @@ int xe_device_probe(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); + xe_vsec_init(xe); + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_fini_display: @@ -990,7 +1119,7 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg) * xe_device_declare_wedged - Declare device wedged * @xe: xe device instance * - * This is a final state that can only be cleared with a mudule + * This is a final state that can only be cleared with a module * re-probe (unbind + bind). * In this state every IOCTL will be blocked so the GT cannot be used. * In general it will be called upon any critical error such as gt reset diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index f1fbfe916867..fc3c2af3fb7f 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -157,8 +157,7 @@ static inline bool xe_device_has_sriov(struct xe_device *xe) static inline bool xe_device_has_msix(struct xe_device *xe) { - /* TODO: change this when MSI-X support is fully integrated */ - return false; + return xe->irq.msix.nvec > 0; } static inline bool xe_device_has_memirq(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 6a04f975ec16..89f532b67bc4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -18,9 +18,11 @@ #include "xe_memirq_types.h" #include "xe_oa_types.h" #include "xe_platform_types.h" +#include "xe_pmu_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" #include "xe_step_types.h" +#include "xe_survivability_mode_types.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define TEST_VM_OPS_ERROR @@ -186,13 +188,6 @@ struct xe_tile { */ struct xe_mmio mmio; - /** - * @mmio_ext: MMIO-extension info for a tile. - * - * Each tile has its own additional 256MB (28-bit) MMIO-extension space. - */ - struct xe_mmio mmio_ext; - /** @mem: memory management info for tile */ struct { /** @@ -263,8 +258,6 @@ struct xe_device { const char *graphics_name; /** @info.media_name: media IP name */ const char *media_name; - /** @info.tile_mmio_ext_size: size of MMIO extension space, per-tile */ - u32 tile_mmio_ext_size; /** @info.graphics_verx100: graphics IP version */ u32 graphics_verx100; /** @info.media_verx100: media IP version */ @@ -314,8 +307,6 @@ struct xe_device { u8 has_heci_gscfi:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; - /** @info.has_mmio_ext: Device has extra MMIO address range */ - u8 has_mmio_ext:1; /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */ u8 has_range_tlb_invalidation:1; /** @info.has_sriov: Supports SR-IOV */ @@ -341,13 +332,24 @@ struct xe_device { u8 skip_pcode:1; } info; + /** @survivability: survivability information for device */ + struct xe_survivability survivability; + /** @irq: device interrupt state */ struct { /** @irq.lock: lock for processing irq's on this device */ spinlock_t lock; /** @irq.enabled: interrupts enabled on this device */ - bool enabled; + atomic_t enabled; + + /** @irq.msix: irq info for platforms that support MSI-X */ + struct { + /** @irq.msix.nvec: number of MSI-X interrupts */ + u16 nvec; + /** @irq.msix.indexes: used to allocate MSI-X indexes */ + struct xarray indexes; + } msix; } irq; /** @ttm: ttm device */ @@ -485,6 +487,12 @@ struct xe_device { struct mutex lock; } d3cold; + /** @pmt: Support the PMT driver callback interface */ + struct { + /** @pmt.lock: protect access for telemetry data */ + struct mutex lock; + } pmt; + /** * @pm_callback_task: Track the active task that is running in either * the runtime_suspend or runtime_resume callbacks. @@ -511,6 +519,9 @@ struct xe_device { int mode; } wedged; + /** @pmu: performance monitoring unit */ + struct xe_pmu pmu; + #ifdef TEST_VM_OPS_ERROR /** * @vm_inject_error_position: inject errors at different places in VM diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 298a587da7f1..a1710591a8d6 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -384,7 +384,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) * @p: The drm_printer ptr * @file: The drm_file ptr * - * This is callabck for drm fdinfo interface. Register this callback + * This is callback for drm fdinfo interface. Register this callback * in drm driver ops for show_fdinfo. * * Return: void diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index d61650d4aa0b..d45b71426cc8 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -10,6 +10,7 @@ #define DRIVER_NAME "xe" #define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DATE "20201103" /* Interface history: * diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 31cca938956f..df8ce550deb4 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -33,7 +33,7 @@ * * In XE we avoid all of this complication by not allowing a BO list to be * passed into an exec, using the dma-buf implicit sync uAPI, have binds as - * seperate operations, and using the DRM scheduler to flow control the ring. + * separate operations, and using the DRM scheduler to flow control the ring. * Let's deep dive on each of these. * * We can get away from a BO list by forcing the user to use in / out fences on diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index aab9e561153d..8948f50ee58f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -16,6 +17,7 @@ #include "xe_hw_engine_class_sysfs.h" #include "xe_hw_engine_group.h" #include "xe_hw_fence.h" +#include "xe_irq.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_migrate.h" @@ -68,6 +70,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->gt = gt; q->class = hwe->class; q->width = width; + q->msix_vec = XE_IRQ_DEFAULT_MSIX; q->logical_mask = logical_mask; q->fence_irq = >->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; @@ -117,7 +120,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) } for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); goto err_unlock; @@ -766,19 +769,21 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) */ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { - struct xe_file *xef; + struct xe_device *xe = gt_to_xe(q->gt); struct xe_lrc *lrc; u32 old_ts, new_ts; + int idx; /* - * Jobs that are run during driver load may use an exec_queue, but are - * not associated with a user xe file, so avoid accumulating busyness - * for kernel specific work. + * Jobs that are executed by kernel doesn't have a corresponding xe_file + * and thus are not accounted. */ - if (!q->vm || !q->vm->xef) + if (!q->xef) return; - xef = q->vm->xef; + /* Synchronize with unbind while holding the xe file open */ + if (!drm_dev_enter(&xe->drm, &idx)) + return; /* * Only sample the first LRC. For parallel submission, all of them are @@ -790,7 +795,9 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) */ lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; + q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; + + drm_dev_exit(idx); } /** diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 1158b6062a6c..5af5419cec7a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -41,7 +41,7 @@ struct xe_exec_queue { /** @xef: Back pointer to xe file if this is user created exec queue */ struct xe_file *xef; - /** @gt: graphics tile this exec queue can submit to */ + /** @gt: GT structure this exec queue can submit to */ struct xe_gt *gt; /** * @hwe: A hardware of the same class. May (physical engine) or may not @@ -63,6 +63,8 @@ struct xe_exec_queue { char name[MAX_FENCE_NAME_LEN]; /** @width: width (number BB submitted per exec) of this exec queue */ u16 width; + /** @msix_vec: MSI-X vector (for platforms that support it) */ + u16 msix_vec; /** @fence_irq: fence IRQ used to signal job completion */ struct xe_hw_fence_irq *fence_irq; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index a8c416a48812..5ef96deaa881 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -17,6 +17,7 @@ #include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_hw_fence.h" +#include "xe_irq.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" @@ -47,6 +48,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, struct xe_mmio *mmio = >->mmio; struct xe_device *xe = gt_to_xe(gt); u64 lrc_desc; + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); lrc_desc = xe_lrc_descriptor(lrc); @@ -80,8 +82,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), xe_bo_ggtt_addr(hwe->hwsp)); xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); - xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + + if (xe_device_has_msix(gt_to_xe(hwe->gt))) + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); + xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode); xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), lower_32_bits(lrc_desc)); @@ -265,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, port->hwe = hwe; - port->lrc = xe_lrc_create(hwe, NULL, SZ_16K); + port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX); if (IS_ERR(port->lrc)) { err = PTR_ERR(port->lrc); goto err; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 05154f9de1a6..5fcb2b4c2c13 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -362,7 +362,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) /* * So we don't need to worry about 64K GGTT layout when dealing with - * scratch entires, rather keep the scratch page in system memory on + * scratch entries, rather keep the scratch page in system memory on * platforms where 64K pages are needed for VRAM. */ flags = XE_BO_FLAG_PINNED; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d6744be01a68..01a4a852b8f4 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -387,6 +387,10 @@ int xe_gt_init_early(struct xe_gt *gt) xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); + err = xe_gt_tlb_invalidation_init_early(gt); + if (err) + return err; + return 0; } @@ -528,8 +532,10 @@ static int all_fw_domain_init(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - if (IS_SRIOV_PF(gt_to_xe(gt))) + if (IS_SRIOV_PF(gt_to_xe(gt))) { + xe_gt_sriov_pf_init(gt); xe_gt_sriov_pf_init_hw(gt); + } xe_force_wake_put(gt_to_fw(gt), fw_ref); @@ -588,10 +594,6 @@ int xe_gt_init(struct xe_gt *gt) xe_hw_fence_irq_init(>->fence_irq[i]); } - err = xe_gt_tlb_invalidation_init(gt); - if (err) - return err; - err = xe_gt_pagefault_init(gt); if (err) return err; @@ -643,6 +645,9 @@ void xe_gt_mmio_init(struct xe_gt *gt) if (gt->info.type == XE_GT_TYPE_MEDIA) { gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; + } else { + gt->mmio.adj_offset = 0; + gt->mmio.adj_limit = 0; } if (IS_SRIOV_VF(gt_to_xe(gt))) @@ -748,10 +753,8 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - for_each_hw_engine(hwe, gt, id) { + for_each_hw_engine(hwe, gt, id) xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); - } /* Get CCS mode in sync between sw/hw */ xe_gt_apply_ccs_mode(gt); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 82b9b7f82fca..e504cc33ade4 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -37,7 +37,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt); /** * xe_gt_record_user_engines - save data related to engines available to - * usersapce + * userspace * @gt: GT structure * * Walk the available HW engines from gt->info.engine_mask and calculate data @@ -56,6 +56,31 @@ void xe_gt_sanitize(struct xe_gt *gt); int xe_gt_sanitize_freq(struct xe_gt *gt); void xe_gt_remove(struct xe_gt *gt); +/** + * xe_gt_wait_for_reset - wait for gt's async reset to finalize. + * @gt: GT structure + * Return: + * %true if it waited for the work to finish execution, + * %false if there was no scheduled reset or it was done. + */ +static inline bool xe_gt_wait_for_reset(struct xe_gt *gt) +{ + return flush_work(>->reset.worker); +} + +/** + * xe_gt_reset - perform synchronous reset + * @gt: GT structure + * Return: + * %true if it waited for the reset to finish, + * %false if there was no scheduled reset. + */ +static inline bool xe_gt_reset(struct xe_gt *gt) +{ + xe_gt_reset_async(gt); + return xe_gt_wait_for_reset(gt); +} + /** * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the * first that matches the same reset domain as @class diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index b6adfb9f2030..50fffc9ebf62 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -150,7 +150,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); gt->ccs_mode = num_engines; xe_gt_record_user_engines(gt); - xe_gt_reset_async(gt); + xe_gt_reset(gt); } mutex_unlock(&xe->drm.filelist_mutex); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 3e8c351a0eab..e7792858b1e4 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -132,11 +132,9 @@ static int force_reset(struct xe_gt *gt, struct drm_printer *p) static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); - xe_gt_reset_async(gt); + xe_gt_reset(gt); xe_pm_runtime_put(gt_to_xe(gt)); - flush_work(>->reset.worker); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 6bd39b2c5003..604bdc7c8173 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -115,6 +115,20 @@ static ssize_t rpe_freq_show(struct device *dev, } static DEVICE_ATTR_RO(rpe_freq); +static ssize_t rpa_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_rpa_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); + + return sysfs_emit(buf, "%d\n", freq); +} +static DEVICE_ATTR_RO(rpa_freq); + static ssize_t rpn_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -202,6 +216,7 @@ static const struct attribute *freq_attrs[] = { &dev_attr_act_freq.attr, &dev_attr_cur_freq.attr, &dev_attr_rp0_freq.attr, + &dev_attr_rpa_freq.attr, &dev_attr_rpe_freq.attr, &dev_attr_rpn_freq.attr, &dev_attr_min_freq.attr, diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index fd80afeef56a..fbbace7b0b12 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -69,6 +69,8 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency) { u64 delta, overflow_residency, prev_residency; + lockdep_assert_held(>idle->lock); + overflow_residency = BIT_ULL(32); /* @@ -122,10 +124,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (!xe_gt_is_media_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; - for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { - if ((gt->info.engine_mask & BIT(i))) - gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | - VDN_MFXVDENC_POWERGATE_ENABLE(j)); + if (xe->info.platform != XE_DG1) { + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { + if ((gt->info.engine_mask & BIT(i))) + gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | + VDN_MFXVDENC_POWERGATE_ENABLE(j)); + } } fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); @@ -273,8 +277,21 @@ static ssize_t idle_status_show(struct device *dev, return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } -static DEVICE_ATTR_RO(idle_status); +u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle) +{ + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + u64 residency; + unsigned long flags; + + raw_spin_lock_irqsave(>idle->lock, flags); + residency = get_residency_ms(gtidle, gtidle->idle_residency(pc)); + raw_spin_unlock_irqrestore(>idle->lock, flags); + + return residency; +} + +static DEVICE_ATTR_RO(idle_status); static ssize_t idle_residency_ms_show(struct device *dev, struct device_attribute *attr, char *buff) { @@ -283,10 +300,10 @@ static ssize_t idle_residency_ms_show(struct device *dev, u64 residency; xe_pm_runtime_get(pc_to_xe(pc)); - residency = gtidle->idle_residency(pc); + residency = xe_gt_idle_residency_msec(gtidle); xe_pm_runtime_put(pc_to_xe(pc)); - return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); + return sysfs_emit(buff, "%llu\n", residency); } static DEVICE_ATTR_RO(idle_residency_ms); @@ -329,6 +346,8 @@ int xe_gt_idle_init(struct xe_gt_idle *gtidle) if (!kobj) return -ENOMEM; + raw_spin_lock_init(>idle->lock); + if (xe_gt_is_media_type(gt)) { snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id); gtidle->idle_residency = xe_guc_pc_mc6_residency; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 4455a6501cb0..591a01e181bc 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -17,5 +17,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt); void xe_gt_idle_enable_pg(struct xe_gt *gt); void xe_gt_idle_disable_pg(struct xe_gt *gt); int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p); +u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle); #endif /* _XE_GT_IDLE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h index b8b297a3f884..a3667c567f8a 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle_types.h +++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h @@ -6,6 +6,7 @@ #ifndef _XE_GT_IDLE_SYSFS_TYPES_H_ #define _XE_GT_IDLE_SYSFS_TYPES_H_ +#include #include struct xe_guc_pc; @@ -31,6 +32,8 @@ struct xe_gt_idle { u64 cur_residency; /** @prev_residency: previous residency counter */ u64 prev_residency; + /** @lock: Lock protecting idle residency counters */ + raw_spinlock_t lock; /** @idle_status: get the current idle state */ enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc); /** @idle_residency: get idle residency counter */ diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 5013d674e17d..605aad3554e7 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -341,7 +341,13 @@ static unsigned int dss_per_group(struct xe_gt *gt) return DIV_ROUND_UP(max_subslices, max_slices); fallback: - xe_gt_dbg(gt, "GuC hwconfig cannot provide dss/slice; using typical fallback values\n"); + /* + * Some older platforms don't have tables or don't have complete tables. + * Newer platforms should always have the required info. + */ + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000) + xe_gt_err(gt, "Slice/Subslice counts missing from hwconfig table; using typical fallback values\n"); + if (gt_to_xe(gt)->info.platform == XE_PVC) return 8; else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) @@ -371,7 +377,7 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, * @group: steering group ID * @instance: steering instance ID * - * Return: the coverted DSS id. + * Return: the converted DSS id. */ u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance) { @@ -550,9 +556,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) * Returns true if the caller should steer to the @group/@instance values * returned. Returns false if the caller need not perform any steering */ -static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, - struct xe_reg_mcr reg_mcr, - u8 *group, u8 *instance) +bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, + struct xe_reg_mcr reg_mcr, + u8 *group, u8 *instance) { const struct xe_reg reg = to_xe_reg(reg_mcr); const struct xe_mmio_range *implicit_ranges; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index c0cd36021c24..bc06520befab 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -26,6 +26,10 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, u32 value); +bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, + struct xe_reg_mcr reg_mcr, + u8 *group, u8 *instance); + void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 2606cd396df5..39344aeab112 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -10,6 +10,7 @@ #include #include +#include #include "abi/guc_actions_abi.h" #include "xe_bo.h" @@ -263,12 +264,13 @@ static void print_pagefault(struct xe_device *xe, struct pagefault *pf) "\tFaultType: %d\n" "\tAccessType: %d\n" "\tFaultLevel: %d\n" - "\tEngineClass: %d\n" + "\tEngineClass: %d %s\n" "\tEngineInstance: %d\n", pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), lower_32_bits(pf->page_addr), pf->fault_type, pf->access_type, pf->fault_level, - pf->engine_class, pf->engine_instance); + pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), + pf->engine_instance); } #define PF_MSG_LEN_DW 4 diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h index 5dc71394372d..11da0228cea7 100644 --- a/drivers/gpu/drm/xe/xe_gt_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -60,6 +60,21 @@ static inline void __xe_gt_printfn_info(struct drm_printer *p, struct va_format xe_gt_info(gt, "%pV", vaf); } +static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_gt *gt = p->arg; + struct drm_printer dbg; + + /* + * The original xe_gt_dbg() callsite annotations are useless here, + * redirect to the tweaked drm_dbg_printer() instead. + */ + dbg = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, NULL); + dbg.origin = p->origin; + + drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf); +} + /** * xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err() * @gt: the &xe_gt pointer to use in xe_gt_err() @@ -90,4 +105,20 @@ static inline struct drm_printer xe_gt_info_printer(struct xe_gt *gt) return p; } +/** + * xe_gt_dbg_printer - Construct a &drm_printer that outputs like xe_gt_dbg() + * @gt: the &xe_gt pointer to use in xe_gt_dbg() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_gt_dbg_printer(struct xe_gt *gt) +{ + struct drm_printer p = { + .printfn = __xe_gt_printfn_dbg, + .arg = gt, + .origin = (const void *)_THIS_IP_, + }; + return p; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index e71fc3d2bda2..d66478deab98 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -15,7 +15,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_service.h" +#include "xe_gt_sriov_printk.h" #include "xe_mmio.h" +#include "xe_pm.h" + +static void pf_worker_restart_func(struct work_struct *w); /* * VF's metadata is maintained in the flexible array where: @@ -41,6 +45,11 @@ static int pf_alloc_metadata(struct xe_gt *gt) return 0; } +static void pf_init_workers(struct xe_gt *gt) +{ + INIT_WORK(>->sriov.pf.workers.restart, pf_worker_restart_func); +} + /** * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -65,9 +74,24 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) if (err) return err; + pf_init_workers(gt); + return 0; } +/** + * xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF. + * @gt: the &xe_gt to initialize + * + * Late one-time initialization of the PF data. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_init(struct xe_gt *gt) +{ + return xe_gt_sriov_pf_migration_init(gt); +} + static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) { return GRAPHICS_VERx100(xe) == 1200; @@ -90,7 +114,6 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) pf_enable_ggtt_guest_update(gt); xe_gt_sriov_pf_service_update(gt); - xe_gt_sriov_pf_migration_init(gt); } static u32 pf_get_vf_regs_stride(struct xe_device *xe) @@ -143,6 +166,35 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_restart(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); + xe_gt_sriov_pf_config_restart(gt); + xe_gt_sriov_pf_control_restart(gt); + xe_pm_runtime_put(xe); + + xe_gt_sriov_dbg(gt, "restart completed\n"); +} + +static void pf_worker_restart_func(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, typeof(*gt), sriov.pf.workers.restart); + + pf_restart(gt); +} + +static void pf_queue_restart(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) + xe_gt_sriov_dbg(gt, "restart already in queue!\n"); +} + /** * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset. * @gt: the &xe_gt @@ -151,6 +203,5 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) */ void xe_gt_sriov_pf_restart(struct xe_gt *gt) { - xe_gt_sriov_pf_config_restart(gt); - xe_gt_sriov_pf_control_restart(gt); + pf_queue_restart(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index 96fab779a906..f474509411c0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -10,6 +10,7 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); +int xe_gt_sriov_pf_init(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); void xe_gt_sriov_pf_restart(struct xe_gt *gt); @@ -19,6 +20,11 @@ static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +static inline int xe_gt_sriov_pf_init(struct xe_gt *gt) +{ + return 0; +} + static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 65082f12f1a8..b1d994d65589 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -20,6 +20,7 @@ #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_printk.h" #include "xe_guc.h" +#include "xe_guc_buf.h" #include "xe_guc_ct.h" #include "xe_guc_db_mgr.h" #include "xe_guc_fwif.h" @@ -71,48 +72,27 @@ static int pf_send_vf_cfg_reset(struct xe_gt *gt, u32 vfid) * Return: number of KLVs that were successfully parsed and saved, * negative error code on failure. */ -static int pf_send_vf_cfg_klvs(struct xe_gt *gt, u32 vfid, const u32 *klvs, u32 num_dwords) +static int pf_send_vf_buf_klvs(struct xe_gt *gt, u32 vfid, struct xe_guc_buf buf, u32 num_dwords) { - const u32 bytes = num_dwords * sizeof(u32); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); struct xe_guc *guc = >->uc.guc; - struct xe_bo *bo; - int ret; - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(bytes, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); - if (IS_ERR(bo)) - return PTR_ERR(bo); - - xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes); - - ret = guc_action_update_vf_cfg(guc, vfid, xe_bo_ggtt_addr(bo), num_dwords); - - xe_bo_unpin_map_no_vm(bo); - - return ret; + return guc_action_update_vf_cfg(guc, vfid, xe_guc_buf_flush(buf), num_dwords); } /* * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed, * negative error code on failure. */ -static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs, - const u32 *klvs, u32 num_dwords) +static int pf_push_vf_buf_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs, + struct xe_guc_buf buf, u32 num_dwords) { int ret; - xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); - - ret = pf_send_vf_cfg_klvs(gt, vfid, klvs, num_dwords); + ret = pf_send_vf_buf_klvs(gt, vfid, buf, num_dwords); if (ret != num_klvs) { int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO; + void *klvs = xe_guc_buf_cpu_ptr(buf); struct drm_printer p = xe_gt_info_printer(gt); char name[8]; @@ -125,13 +105,35 @@ static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { struct drm_printer p = xe_gt_info_printer(gt); + void *klvs = xe_guc_buf_cpu_ptr(buf); + char name[8]; + xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs)); xe_guc_klv_print(klvs, num_dwords, &p); } return 0; } +/* + * Return: 0 on success, -ENOBUFS if no free buffer for the indirect data, + * negative error code on failure. + */ +static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs, + const u32 *klvs, u32 num_dwords) +{ + CLASS(xe_guc_buf_from_data, buf)(>->uc.guc.buf, klvs, num_dwords * sizeof(u32)); + + xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + return pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); +} + static int pf_push_vf_cfg_u32(struct xe_gt *gt, unsigned int vfid, u16 key, u32 value) { u32 klv[] = { @@ -262,7 +264,7 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool n += encode_config_ggtt(cfg, config, details); - if (details) { + if (details && config->num_ctxs) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID); cfg[n++] = config->begin_ctx; } @@ -270,7 +272,7 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS); cfg[n++] = config->num_ctxs; - if (details) { + if (details && config->num_dbs) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID); cfg[n++] = config->begin_db; } @@ -304,16 +306,17 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - u32 max_cfg_dwords = SZ_4K / sizeof(u32); + u32 max_cfg_dwords = xe_guc_buf_cache_dwords(>->uc.guc.buf); + CLASS(xe_guc_buf, buf)(>->uc.guc.buf, max_cfg_dwords); u32 num_dwords; int num_klvs; u32 *cfg; int err; - cfg = kcalloc(max_cfg_dwords, sizeof(u32), GFP_KERNEL); - if (!cfg) - return -ENOMEM; + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + cfg = xe_guc_buf_cpu_ptr(buf); num_dwords = encode_config(cfg, config, true); xe_gt_assert(gt, num_dwords <= max_cfg_dwords); @@ -330,12 +333,31 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) xe_gt_assert(gt, num_dwords <= max_cfg_dwords); num_klvs = xe_guc_klv_count(cfg, num_dwords); - err = pf_push_vf_cfg_klvs(gt, vfid, num_klvs, cfg, num_dwords); + err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); - kfree(cfg); return err; } +static int pf_push_vf_cfg(struct xe_gt *gt, unsigned int vfid, bool reset) +{ + int err = 0; + + xe_gt_assert(gt, vfid); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (reset) + err = pf_send_vf_cfg_reset(gt, vfid); + if (!err) + err = pf_push_full_vf_config(gt, vfid); + + return err; +} + +static int pf_refresh_vf_cfg(struct xe_gt *gt, unsigned int vfid) +{ + return pf_push_vf_cfg(gt, vfid, true); +} + static u64 pf_get_ggtt_alignment(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -432,6 +454,10 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) return err; pf_release_vf_config_ggtt(gt, config); + + err = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(err)) + return err; } xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); @@ -757,6 +783,10 @@ static int pf_provision_vf_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctx return ret; pf_release_config_ctxs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_ctxs) @@ -1054,6 +1084,10 @@ static int pf_provision_vf_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs) return ret; pf_release_config_dbs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_dbs) @@ -2085,10 +2119,7 @@ int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh xe_gt_assert(gt, vfid); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - if (refresh) - err = pf_send_vf_cfg_reset(gt, vfid); - if (!err) - err = pf_push_full_vf_config(gt, vfid); + err = pf_push_vf_cfg(gt, vfid, refresh); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); if (unlikely(err)) { @@ -2120,7 +2151,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { - bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); + bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid); valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; @@ -2161,7 +2192,7 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid) * * This function can only be called on PF. * - * Return: mininum size of the buffer or the number of bytes saved, + * Return: minimum size of the buffer or the number of bytes saved, * or a negative error code on failure. */ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c index c00fb354705f..4445f660e6d1 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -10,6 +10,7 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_printk.h" +#include "xe_guc_buf.h" #include "xe_guc_ct.h" #include "xe_guc_klv_helpers.h" #include "xe_pm.h" @@ -34,48 +35,28 @@ static int guc_action_update_vgt_policy(struct xe_guc *guc, u64 addr, u32 size) * Return: number of KLVs that were successfully parsed and saved, * negative error code on failure. */ -static int pf_send_policy_klvs(struct xe_gt *gt, const u32 *klvs, u32 num_dwords) +static int pf_send_policy_klvs(struct xe_gt *gt, struct xe_guc_buf buf, u32 num_dwords) { - const u32 bytes = num_dwords * sizeof(u32); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); struct xe_guc *guc = >->uc.guc; - struct xe_bo *bo; - int ret; - - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(bytes, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) - return PTR_ERR(bo); - - xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes); - ret = guc_action_update_vgt_policy(guc, xe_bo_ggtt_addr(bo), num_dwords); - - xe_bo_unpin_map_no_vm(bo); - - return ret; + return guc_action_update_vgt_policy(guc, xe_guc_buf_flush(buf), num_dwords); } /* * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed, * negative error code on failure. */ -static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs, - const u32 *klvs, u32 num_dwords) +static int pf_push_policy_buf_klvs(struct xe_gt *gt, u32 num_klvs, + struct xe_guc_buf buf, u32 num_dwords) { int ret; - xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); - - ret = pf_send_policy_klvs(gt, klvs, num_dwords); + ret = pf_send_policy_klvs(gt, buf, num_dwords); if (ret != num_klvs) { int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO; struct drm_printer p = xe_gt_info_printer(gt); + void *klvs = xe_guc_buf_cpu_ptr(buf); xe_gt_sriov_notice(gt, "Failed to push %u policy KLV%s (%pe)\n", num_klvs, str_plural(num_klvs), ERR_PTR(err)); @@ -86,6 +67,23 @@ static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs, return 0; } +/* + * Return: 0 on success, -ENOBUFS if there is no free buffer for the indirect data, + * negative error code on failure. + */ +static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs, + const u32 *klvs, u32 num_dwords) +{ + CLASS(xe_guc_buf_from_data, buf)(>->uc.guc.buf, klvs, num_dwords * sizeof(u32)); + + xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + return pf_push_policy_buf_klvs(gt, num_klvs, buf, num_dwords); +} + static int pf_push_policy_u32(struct xe_gt *gt, u16 key, u32 value) { u32 klv[] = { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 924e75b94aec..6b5f849a0722 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -176,11 +176,32 @@ static const struct xe_reg ver_2000_runtime_regs[] = { TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ }; +static const struct xe_reg ver_3000_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + XEHP_FUSE4, /* _MMIO(0x9114) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_FUSE1, /* _MMIO(0x911c) */ + MIRROR_L3BANK_ENABLE, /* _MMIO(0x9130) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ + XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ + XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ +}; + static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count) { const struct xe_reg *regs; - if (GRAPHICS_VERx100(xe) >= 2000) { + if (GRAPHICS_VERx100(xe) >= 3000) { + *count = ARRAY_SIZE(ver_3000_runtime_regs); + regs = ver_3000_runtime_regs; + } else if (GRAPHICS_VERx100(xe) >= 2000) { *count = ARRAY_SIZE(ver_2000_runtime_regs); regs = ver_2000_runtime_regs; } else if (GRAPHICS_VERx100(xe) >= 1270) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index 0426b1a77069..a64a6835ad65 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -35,8 +35,17 @@ struct xe_gt_sriov_metadata { struct xe_gt_sriov_state_snapshot snapshot; }; +/** + * struct xe_gt_sriov_pf_workers - GT level workers used by the PF. + */ +struct xe_gt_sriov_pf_workers { + /** @restart: worker that executes actions post GT reset */ + struct work_struct restart; +}; + /** * struct xe_gt_sriov_pf - GT level PF virtualization data. + * @workers: workers data. * @service: service data. * @control: control data. * @policy: policy data. @@ -45,6 +54,7 @@ struct xe_gt_sriov_metadata { * @vfs: metadata for all VFs. */ struct xe_gt_sriov_pf { + struct xe_gt_sriov_pf_workers workers; struct xe_gt_sriov_pf_service service; struct xe_gt_sriov_pf_control control; struct xe_gt_sriov_pf_policy policy; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index cca5d5732802..6671030439fd 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -213,6 +213,9 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) { int err; + if (!xe_device_uc_enabled(gt_to_xe(gt))) + return -ENODEV; + err = vf_reset_guc_state(gt); if (unlikely(err)) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index c7364a5aef8f..7a6c1d808e41 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -12,7 +12,7 @@ /** * xe_gt_stats_incr - Increments the specified stats counter - * @gt: graphics tile + * @gt: GT structure * @id: xe_gt_stats_id type id that needs to be incremented * @incr: value to be incremented with * @@ -32,7 +32,7 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { /** * xe_gt_stats_print_info - Print the GT stats - * @gt: graphics tile + * @gt: GT structure * @p: drm_printer where it will be printed out. * * This prints out all the available GT stats. diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 665927b80e9e..0a93831c0a02 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -106,15 +106,15 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) } /** - * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state - * @gt: graphics tile + * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state + * @gt: GT structure * * Initialize GT TLB invalidation state, purely software initialization, should * be called once during driver load. * * Return: 0 on success, negative error code on error. */ -int xe_gt_tlb_invalidation_init(struct xe_gt *gt) +int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt) { gt->tlb_invalidation.seqno = 1; INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); @@ -128,7 +128,7 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset - * @gt: graphics tile + * @gt: GT structure * * Signal any pending invalidation fences, should be called during a GT reset */ @@ -244,7 +244,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, /** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC - * @gt: graphics tile + * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion * @@ -277,7 +277,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, /** * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT - * @gt: graphics tile + * @gt: GT structure * * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is * synchronous. @@ -326,7 +326,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an * address range * - * @gt: graphics tile + * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion * @start: start address @@ -412,7 +412,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, /** * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA - * @gt: graphics tile + * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion, can be NULL * @vma: VMA to invalidate diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 00b1c6c01e8d..672acfcdf0d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -14,7 +14,8 @@ struct xe_gt; struct xe_guc; struct xe_vma; -int xe_gt_tlb_invalidation_init(struct xe_gt *gt); +int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); + void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index df2042db7ee6..516c81e3b8dd 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -129,7 +129,8 @@ static void load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) { struct xe_device *xe = gt_to_xe(gt); - u32 fuse3 = xe_mmio_read32(>->mmio, MIRROR_FUSE3); + struct xe_mmio *mmio = >->mmio; + u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); /* * PTL platforms with media version 30.00 do not provide proper values @@ -143,7 +144,16 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) if (XE_WA(gt, no_media_l3)) return; - if (GRAPHICS_VER(xe) >= 20) { + if (GRAPHICS_VER(xe) >= 30) { + xe_l3_bank_mask_t per_node = {}; + u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); + u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); + u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable); + + bitmap_from_arr32(per_node, &bank_val, 32); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32, + meml3_en); + } else if (GRAPHICS_VER(xe) >= 20) { xe_l3_bank_mask_t per_node = {}; u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3); @@ -155,7 +165,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) xe_l3_bank_mask_t per_node = {}; xe_l3_bank_mask_t per_mask_bit = {}; u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); - u32 fuse4 = xe_mmio_read32(>->mmio, XEHP_FUSE4); + u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4); u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4); bitmap_set_value8(per_mask_bit, 0x3, 0); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 4e2868efb620..1619c0a52db9 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -23,6 +23,7 @@ #include "xe_gt_sriov_vf.h" #include "xe_gt_throttle.h" #include "xe_guc_ads.h" +#include "xe_guc_buf.h" #include "xe_guc_capture.h" #include "xe_guc_ct.h" #include "xe_guc_db_mgr.h" @@ -147,6 +148,34 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc) return flags; } +static bool needs_wa_dual_queue(struct xe_gt *gt) +{ + /* + * The DUAL_QUEUE_WA tells the GuC to not allow concurrent submissions + * on RCS and CCSes with different address spaces, which on DG2 is + * required as a WA for an HW bug. + */ + if (XE_WA(gt, 22011391025)) + return true; + + /* + * On newer platforms, the HW has been updated to not allow parallel + * execution of different address spaces, so the RCS/CCS will stall the + * context switch if one of the other RCS/CCSes is busy with a different + * address space. While functionally correct, having a submission + * stalled on the HW limits the GuC ability to shuffle things around and + * can cause complications if the non-stalled submission runs for a long + * time, because the GuC doesn't know that the stalled submission isn't + * actually running and might declare it as hung. Therefore, we enable + * the DUAL_QUEUE_WA on all newer platforms on GTs that have CCS engines + * to move management back to the GuC. + */ + if (CCS_MASK(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) + return true; + + return false; +} + static u32 guc_ctl_wa_flags(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -159,7 +188,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 14014475959)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; - if (XE_WA(gt, 22011391025)) + if (needs_wa_dual_queue(gt)) flags |= GUC_WA_DUAL_QUEUE; /* @@ -715,6 +744,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; + ret = xe_guc_buf_cache_init(&guc->buf); + if (ret) + return ret; + return xe_guc_ads_init_post_hwconfig(&guc->ads); } diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index b0afb89d9d90..fab259adc380 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -29,6 +29,7 @@ #include "xe_platform_types.h" #include "xe_uc_fw.h" #include "xe_wa.h" +#include "xe_gt_mcr.h" /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 @@ -243,8 +244,6 @@ static size_t calculate_regset_size(struct xe_gt *gt) xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry) count++; - count += RING_MAX_NONPRIV_SLOTS * XE_NUM_HW_ENGINES; - count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; if (XE_WA(gt, 1607983814)) @@ -698,6 +697,20 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, .flags = reg.masked ? GUC_REGSET_MASKED : 0, }; + if (reg.mcr) { + struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr); + u8 group, instance; + + bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg, + &group, &instance); + + if (steer) { + entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group); + entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance); + entry.flags |= GUC_REGSET_STEERING_NEEDED; + } + } + xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), &entry, sizeof(entry)); } @@ -729,11 +742,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, xa_for_each(&hwe->reg_sr.xa, idx, entry) guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++); - for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) - guc_mmio_regset_write_one(ads, regset_map, - RING_FORCE_TO_NONPRIV(hwe->mmio_base, i), - count++); - for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) { if (e->skip) continue; diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c new file mode 100644 index 000000000000..0193c94dd6a0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_buf.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include +#include + +#include "xe_assert.h" +#include "xe_bo.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_guc_buf.h" +#include "xe_sa.h" + +static struct xe_guc *cache_to_guc(struct xe_guc_buf_cache *cache) +{ + return container_of(cache, struct xe_guc, buf); +} + +static struct xe_gt *cache_to_gt(struct xe_guc_buf_cache *cache) +{ + return guc_to_gt(cache_to_guc(cache)); +} + +/** + * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache. + * @cache: the &xe_guc_buf_cache to initialize + * + * The Buffer Cache allows to obtain a reusable buffer that can be used to pass + * indirect H2G data to GuC without a need to create a ad-hoc allocation. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) +{ + struct xe_gt *gt = cache_to_gt(cache); + struct xe_sa_manager *sam; + + /* XXX: currently it's useful only for the PF actions */ + if (!IS_SRIOV_PF(gt_to_xe(gt))) + return 0; + + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), SZ_8K, 0, sizeof(u32)); + if (IS_ERR(sam)) + return PTR_ERR(sam); + cache->sam = sam; + + xe_gt_dbg(gt, "reusable buffer with %u dwords at %#x for %ps\n", + xe_guc_buf_cache_dwords(cache), xe_bo_ggtt_addr(sam->bo), + __builtin_return_address(0)); + return 0; +} + +/** + * xe_guc_buf_cache_dwords() - Number of dwords the GuC Buffer Cache supports. + * @cache: the &xe_guc_buf_cache to query + * + * Return: a size of the largest reusable buffer (in dwords) + */ +u32 xe_guc_buf_cache_dwords(struct xe_guc_buf_cache *cache) +{ + return cache->sam ? cache->sam->base.size / sizeof(u32) : 0; +} + +/** + * xe_guc_buf_reserve() - Reserve a new sub-allocation. + * @cache: the &xe_guc_buf_cache where reserve sub-allocation + * @dwords: the requested size of the buffer in dwords + * + * Use xe_guc_buf_is_valid() to check if returned buffer reference is valid. + * Must use xe_guc_buf_release() to release a sub-allocation. + * + * Return: a &xe_guc_buf of new sub-allocation. + */ +struct xe_guc_buf xe_guc_buf_reserve(struct xe_guc_buf_cache *cache, u32 dwords) +{ + struct drm_suballoc *sa; + + if (cache->sam) + sa = __xe_sa_bo_new(cache->sam, dwords * sizeof(u32), GFP_ATOMIC); + else + sa = ERR_PTR(-EOPNOTSUPP); + + return (struct xe_guc_buf){ .sa = sa }; +} + +/** + * xe_guc_buf_from_data() - Reserve a new sub-allocation using data. + * @cache: the &xe_guc_buf_cache where reserve sub-allocation + * @data: the data to flush the sub-allocation + * @size: the size of the data + * + * Similar to xe_guc_buf_reserve() but flushes @data to the GPU memory. + * + * Return: a &xe_guc_buf of new sub-allocation. + */ +struct xe_guc_buf xe_guc_buf_from_data(struct xe_guc_buf_cache *cache, + const void *data, size_t size) +{ + struct drm_suballoc *sa; + + sa = __xe_sa_bo_new(cache->sam, size, GFP_ATOMIC); + if (!IS_ERR(sa)) + memcpy(xe_sa_bo_cpu_addr(sa), data, size); + + return (struct xe_guc_buf){ .sa = sa }; +} + +/** + * xe_guc_buf_release() - Release a sub-allocation. + * @buf: the &xe_guc_buf to release + * + * Releases a sub-allocation reserved by the xe_guc_buf_reserve(). + */ +void xe_guc_buf_release(const struct xe_guc_buf buf) +{ + if (xe_guc_buf_is_valid(buf)) + xe_sa_bo_free(buf.sa, NULL); +} + +/** + * xe_guc_buf_flush() - Copy the data from the sub-allocation to the GPU memory. + * @buf: the &xe_guc_buf to flush + * + * Return: a GPU address of the sub-allocation. + */ +u64 xe_guc_buf_flush(const struct xe_guc_buf buf) +{ + xe_sa_bo_flush_write(buf.sa); + return xe_sa_bo_gpu_addr(buf.sa); +} + +/** + * xe_guc_buf_cpu_ptr() - Obtain a CPU pointer to the sub-allocation. + * @buf: the &xe_guc_buf to query + * + * Return: a CPU pointer of the sub-allocation. + */ +void *xe_guc_buf_cpu_ptr(const struct xe_guc_buf buf) +{ + return xe_sa_bo_cpu_addr(buf.sa); +} + +/** + * xe_guc_buf_gpu_addr() - Obtain a GPU address of the sub-allocation. + * @buf: the &xe_guc_buf to query + * + * Return: a GPU address of the sub-allocation. + */ +u64 xe_guc_buf_gpu_addr(const struct xe_guc_buf buf) +{ + return xe_sa_bo_gpu_addr(buf.sa); +} + +/** + * xe_guc_cache_gpu_addr_from_ptr() - Lookup a GPU address using the pointer. + * @cache: the &xe_guc_buf_cache with sub-allocations + * @ptr: the CPU pointer of the sub-allocation + * @size: the size of the data + * + * Return: a GPU address on success or 0 if the pointer was unrelated. + */ +u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *ptr, u32 size) +{ + ptrdiff_t offset = ptr - cache->sam->cpu_ptr; + + if (offset < 0 || offset + size > cache->sam->base.size) + return 0; + + return cache->sam->gpu_addr + offset; +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_guc_buf_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_buf.h b/drivers/gpu/drm/xe/xe_guc_buf.h new file mode 100644 index 000000000000..0d67604d96bd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_buf.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_BUF_H_ +#define _XE_GUC_BUF_H_ + +#include +#include + +#include "xe_guc_buf_types.h" + +int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache); +u32 xe_guc_buf_cache_dwords(struct xe_guc_buf_cache *cache); +struct xe_guc_buf xe_guc_buf_reserve(struct xe_guc_buf_cache *cache, u32 dwords); +struct xe_guc_buf xe_guc_buf_from_data(struct xe_guc_buf_cache *cache, + const void *data, size_t size); +void xe_guc_buf_release(const struct xe_guc_buf buf); + +/** + * xe_guc_buf_is_valid() - Check if a buffer reference is valid. + * @buf: the &xe_guc_buf reference to check + * + * Return: true if @ref represents a valid sub-allication. + */ +static inline bool xe_guc_buf_is_valid(const struct xe_guc_buf buf) +{ + return !IS_ERR_OR_NULL(buf.sa); +} + +void *xe_guc_buf_cpu_ptr(const struct xe_guc_buf buf); +u64 xe_guc_buf_flush(const struct xe_guc_buf buf); +u64 xe_guc_buf_gpu_addr(const struct xe_guc_buf buf); +u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *ptr, u32 size); + +DEFINE_CLASS(xe_guc_buf, struct xe_guc_buf, + xe_guc_buf_release(_T), + xe_guc_buf_reserve(cache, num), + struct xe_guc_buf_cache *cache, u32 num); + +DEFINE_CLASS(xe_guc_buf_from_data, struct xe_guc_buf, + xe_guc_buf_release(_T), + xe_guc_buf_from_data(cache, data, size), + struct xe_guc_buf_cache *cache, const void *data, size_t size); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_buf_types.h b/drivers/gpu/drm/xe/xe_guc_buf_types.h new file mode 100644 index 000000000000..9e123d71c064 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_buf_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_BUF_TYPES_H_ +#define _XE_GUC_BUF_TYPES_H_ + +struct drm_suballoc; +struct xe_sa_manager; + +/** + * struct xe_guc_buf_cache - GuC Data Buffer Cache. + */ +struct xe_guc_buf_cache { + /* private: internal sub-allocation manager */ + struct xe_sa_manager *sam; +}; + +/** + * struct xe_guc_buf - GuC Data Buffer Reference. + */ +struct xe_guc_buf { + /* private: internal sub-allocation reference */ + struct drm_suballoc *sa; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 137571fae4ed..f6d523e4c5fe 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1955,7 +1955,7 @@ xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q) } /* - * xe_guc_capture_put_matched_nodes - Cleanup macthed nodes + * xe_guc_capture_put_matched_nodes - Cleanup matched nodes * @guc: The GuC object * * Free matched node and all nodes with the equal guc_id from diff --git a/drivers/gpu/drm/xe/xe_guc_capture_types.h b/drivers/gpu/drm/xe/xe_guc_capture_types.h index 2057125b1bfa..ca2d390ccbee 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture_types.h +++ b/drivers/gpu/drm/xe/xe_guc_capture_types.h @@ -22,7 +22,7 @@ enum capture_register_data_type { * struct __guc_mmio_reg_descr - GuC mmio register descriptor * * xe_guc_capture module uses these structures to define a register - * (offsets, names, flags,...) that are used at the ADS regisration + * (offsets, names, flags,...) that are used at the ADS registration * time as well as during runtime processing and reporting of error- * capture states generated by GuC just prior to engine reset events. */ @@ -48,7 +48,7 @@ struct __guc_mmio_reg_descr { * * xe_guc_capture module uses these structures to maintain static * tables (per unique platform) that consists of lists of registers - * (offsets, names, flags,...) that are used at the ADS regisration + * (offsets, names, flags,...) that are used at the ADS registration * time as well as during runtime processing and reporting of error- * capture states generated by GuC just prior to engine reset events. */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 7d33f3a11e61..72ad576fc18e 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -710,7 +710,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, --len; ++action; - /* Write H2G ensuring visable before descriptor update */ + /* Write H2G ensuring visible before descriptor update */ xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32)); xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32)); xe_device_wmb(xe); @@ -1383,7 +1383,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) * this function and nowhere else. Hence, they cannot be different * unless two g2h_read calls are running concurrently. Which is not * possible because it is guarded by ct->fast_lock. And yet, some - * discrete platforms are reguarly hitting this error :(. + * discrete platforms are regularly hitting this error :(. * * desc_head rolling backwards shouldn't cause any noticeable * problems - just a delay in GuC being allowed to proceed past that @@ -1723,8 +1723,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, drm_printf(p, "\tg2h outstanding: %d\n", snapshot->g2h_outstanding); - if (snapshot->ctb) - xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size); + if (snapshot->ctb) { + drm_printf(p, "[CTB].length: 0x%zx\n", snapshot->ctb_size); + xe_print_blob_ascii85(p, "[CTB].data", '\n', + snapshot->ctb, 0, snapshot->ctb_size); + } } else { drm_puts(p, "CT disabled\n"); } diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index 995b306aced7..0aff1d462bc0 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -13,6 +13,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_log.h" +#include "xe_guc_pc.h" #include "xe_macros.h" #include "xe_pm.h" @@ -60,10 +61,24 @@ static int guc_ctb(struct seq_file *m, void *data) return 0; } +static int guc_pc(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_pm_runtime_get(xe); + xe_guc_pc_print(&guc->pc, &p); + xe_pm_runtime_put(xe); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"guc_info", guc_info, 0}, {"guc_log", guc_log, 0}, {"guc_ctb", guc_ctb, 0}, + {"guc_pc", guc_pc, 0}, }; void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index df4cfb698cdb..80514a446ba2 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -149,16 +149,12 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, size_t remain; int i; - if (!log->bo) { - xe_gt_err(gt, "GuC log buffer not allocated\n"); + if (!log->bo) return NULL; - } snapshot = xe_guc_log_snapshot_alloc(log, atomic); - if (!snapshot) { - xe_gt_err(gt, "GuC log snapshot not allocated\n"); + if (!snapshot) return NULL; - } remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { @@ -208,11 +204,14 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ drm_printf(p, "GuC timestamp: 0x%08llX [%llu]\n", snapshot->stamp, snapshot->stamp); drm_printf(p, "Log level: %u\n", snapshot->level); + drm_printf(p, "[LOG].length: 0x%zx\n", snapshot->size); remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain); + const char *prefix = i ? NULL : "[LOG].data"; + char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0; - xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size); + xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size); remain -= size; } } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index e8b9faeaef64..43f9617baba2 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -8,6 +8,7 @@ #include #include +#include #include #include "abi/guc_actions_slpc_abi.h" @@ -38,6 +39,7 @@ #define FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) +#define RPA_MASK REG_GENMASK(31, 16) #define GT_PERF_STATUS XE_REG(0x1381b4) #define CAGF_MASK REG_GENMASK(19, 11) @@ -328,6 +330,19 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) freq); } +static void mtl_update_rpa_value(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(>->mmio, MTL_MPA_FREQUENCY); + else + reg = xe_mmio_read32(>->mmio, MTL_GT_RPA_FREQUENCY); + + pc->rpa_freq = decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); +} + static void mtl_update_rpe_value(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); @@ -341,6 +356,25 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc) pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); } +static void tgl_update_rpa_value(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 reg; + + /* + * For PVC we still need to use fused RP1 as the approximation for RPe + * For other platforms than PVC we get the resolved RPe directly from + * PCODE at a different register + */ + if (xe->info.platform == XE_PVC) + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); + else + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); + + pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + static void tgl_update_rpe_value(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); @@ -365,10 +399,13 @@ static void pc_update_rp_values(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1270) + if (GRAPHICS_VERx100(xe) >= 1270) { + mtl_update_rpa_value(pc); mtl_update_rpe_value(pc); - else + } else { + tgl_update_rpa_value(pc); tgl_update_rpe_value(pc); + } /* * RPe is decided at runtime by PCODE. In the rare case where that's @@ -421,8 +458,8 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { xe_force_wake_put(gt_to_fw(gt), fw_ref); return -ETIMEDOUT; } @@ -447,6 +484,19 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) return pc->rp0_freq; } +/** + * xe_guc_pc_get_rpa_freq - Get the RPa freq + * @pc: The GuC PC + * + * Returns: RPa freq. + */ +u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc) +{ + pc_update_rp_values(pc); + + return pc->rpa_freq; +} + /** * xe_guc_pc_get_rpe_freq - Get the RPe freq * @pc: The GuC PC @@ -481,10 +531,10 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { - struct xe_gt *gt = pc_to_gt(pc); - unsigned int fw_ref; int ret; + xe_device_assert_mem_access(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -492,24 +542,12 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) goto out; } - /* - * GuC SLPC plays with min freq request when GuCRC is enabled - * Block RC6 for a more reliable read. - */ - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - ret = -ETIMEDOUT; - goto fw; - } - ret = pc_action_query_task_state(pc); if (ret) - goto fw; + goto out; *freq = pc_get_min_freq(pc); -fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); out: mutex_unlock(&pc->freq_lock); return ret; @@ -969,8 +1007,8 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) xe_gt_assert(gt, xe_device_uc_enabled(xe)); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { xe_force_wake_put(gt_to_fw(gt), fw_ref); return -ETIMEDOUT; } @@ -1094,3 +1132,61 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc); } + +static const char *pc_get_state_string(struct xe_guc_pc *pc) +{ + switch (slpc_shared_data_read(pc, header.global_state)) { + case SLPC_GLOBAL_STATE_NOT_RUNNING: + return "not running"; + case SLPC_GLOBAL_STATE_INITIALIZING: + return "initializing"; + case SLPC_GLOBAL_STATE_RESETTING: + return "resetting"; + case SLPC_GLOBAL_STATE_RUNNING: + return "running"; + case SLPC_GLOBAL_STATE_SHUTTING_DOWN: + return "shutting down"; + case SLPC_GLOBAL_STATE_ERROR: + return "error"; + default: + return "unknown"; + } +} + +/** + * xe_guc_pc_print - Print GuC's Power Conservation information for debug + * @pc: Xe_GuC_PC instance + * @p: drm_printer + */ +void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p) +{ + drm_printf(p, "SLPC Shared Data Header:\n"); + drm_printf(p, "\tSize: %x\n", slpc_shared_data_read(pc, header.size)); + drm_printf(p, "\tGlobal State: %s\n", pc_get_state_string(pc)); + + if (pc_action_query_task_state(pc)) + return; + + drm_printf(p, "\nSLPC Tasks Status:\n"); + drm_printf(p, "\tGTPERF enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tDCC enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_DCC_TASK_ENABLED)); + drm_printf(p, "\tDCC in use: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_IN_DCC)); + drm_printf(p, "\tBalancer enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_BALANCER_ENABLED)); + drm_printf(p, "\tIBC enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_IBC_TASK_ENABLED)); + drm_printf(p, "\tBalancer IA LMT enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_BALANCER_IA_LMT_ENABLED)); + drm_printf(p, "\tBalancer IA LMT active: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_BALANCER_IA_LMT_ACTIVE)); +} diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index efda432fadfc..39102b79602f 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -10,6 +10,7 @@ struct xe_guc_pc; enum slpc_gucrc_mode; +struct drm_printer; int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); @@ -17,10 +18,12 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc); int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode); int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); +void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc); +u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc); u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq); diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 13810be015db..2978ac9a249b 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -17,6 +17,8 @@ struct xe_guc_pc { struct xe_bo *bo; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; + /** @rpa_freq: HW RPa frequency - The Achievable one */ + u32 rpa_freq; /** @rpe_freq: HW RPe frequency - The Efficient one */ u32 rpe_freq; /** @rpn_freq: HW RPN frequency - The Minimum one */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9c36329fe857..913c74d6e2ae 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1226,7 +1226,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) enable_scheduling(q); rearm: /* - * XXX: Ideally want to adjust timeout based on current exection time + * XXX: Ideally want to adjust timeout based on current execution time * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 83a41ebcdc91..573aa6308380 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -11,6 +11,7 @@ #include "regs/xe_reg_defs.h" #include "xe_guc_ads_types.h" +#include "xe_guc_buf_types.h" #include "xe_guc_ct_types.h" #include "xe_guc_fwif.h" #include "xe_guc_log_types.h" @@ -58,6 +59,8 @@ struct xe_guc { struct xe_guc_ads ads; /** @ct: GuC ct */ struct xe_guc_ct ct; + /** @buf: GuC Buffer Cache manager */ + struct xe_guc_buf_cache buf; /** @capture: the error-state-capture module's data and objects */ struct xe_guc_state_capture *capture; /** @pc: GuC Power Conservation */ diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index d765bfd3636b..06dc78d3a812 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -12,6 +12,7 @@ #include "xe_drv.h" #include "xe_heci_gsc.h" #include "xe_platform_types.h" +#include "xe_survivability_mode.h" #define GSC_BAR_LENGTH 0x00000FFC @@ -200,7 +201,7 @@ void xe_heci_gsc_init(struct xe_device *xe) return; } - if (!def->use_polling) { + if (!def->use_polling && !xe_survivability_mode_enabled(xe)) { ret = heci_gsc_irq_setup(xe); if (ret) goto fail; diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 2c32dc46f7d4..089834467880 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -159,7 +159,7 @@ void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) * This function allocates the storage of the userptr sg table. * It is caller's responsibility to free it calling sg_free_table. * - * returns: 0 for succuss; negative error no on failure + * returns: 0 for success; negative error no on failure */ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c4b0dc3be39c..fc447751fe78 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -324,6 +324,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) { u32 ccs_mask = xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) xe_mmio_write32(&hwe->gt->mmio, RCU_MODE, @@ -332,8 +333,10 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), xe_bo_ggtt_addr(hwe->hwsp)); - xe_hw_engine_mmio_write32(hwe, RING_MODE(0), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + + if (xe_device_has_msix(gt_to_xe(hwe->gt))) + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode); xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), _MASKED_BIT_DISABLE(STOP_RING)); xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); @@ -419,7 +422,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) * Bspec: 72161 */ const u8 mocs_write_idx = gt->mocs.uc_index; - const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && + const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? gt->mocs.wb_index : gt->mocs.uc_index; u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | @@ -574,7 +577,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, XE_BO_FLAG_VRAM_IF_DGFX(tile) | @@ -773,7 +775,7 @@ static void check_gsc_availability(struct xe_gt *gt) xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0); - drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); + drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n"); } } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index e14bee95e364..e4191a7a2c31 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -106,7 +106,7 @@ struct xe_hw_engine_class_intf { * Contains all the hardware engine state for physical instances. */ struct xe_hw_engine { - /** @gt: graphics tile this hw engine belongs to */ + /** @gt: GT structure this hw engine belongs to */ struct xe_gt *gt; /** @name: name of this hw engine */ const char *name; diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index 364a61f4bfda..58a8d09afe5c 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -41,7 +41,7 @@ struct xe_hw_fence_irq { * to a xe_hw_fence_irq, maintains serial seqno. */ struct xe_hw_fence_ctx { - /** @gt: graphics tile of hardware fence context */ + /** @gt: GT structure of hardware fence context */ struct xe_gt *gt; /** @irq: fence irq handler */ struct xe_hw_fence_irq *irq; diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 7bf7201529ac..32f5a67a917b 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -10,6 +10,7 @@ #include #include "display/xe_display.h" +#include "regs/xe_guc_regs.h" #include "regs/xe_irq_regs.h" #include "xe_device.h" #include "xe_drv.h" @@ -29,6 +30,11 @@ #define IIR(offset) XE_REG(offset + 0x8) #define IER(offset) XE_REG(offset + 0xc) +static int xe_irq_msix_init(struct xe_device *xe); +static void xe_irq_msix_free(struct xe_device *xe); +static int xe_irq_msix_request_irqs(struct xe_device *xe); +static void xe_irq_msix_synchronize_irq(struct xe_device *xe); + static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg) { u32 val = xe_mmio_read32(mmio, reg); @@ -348,12 +354,8 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) unsigned long intr_dw[2]; u32 identity[32]; - spin_lock(&xe->irq.lock); - if (!xe->irq.enabled) { - spin_unlock(&xe->irq.lock); + if (!atomic_read(&xe->irq.enabled)) return IRQ_NONE; - } - spin_unlock(&xe->irq.lock); master_ctl = xelp_intr_disable(xe); if (!master_ctl) { @@ -417,12 +419,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) /* TODO: This really shouldn't be copied+pasted */ - spin_lock(&xe->irq.lock); - if (!xe->irq.enabled) { - spin_unlock(&xe->irq.lock); + if (!atomic_read(&xe->irq.enabled)) return IRQ_NONE; - } - spin_unlock(&xe->irq.lock); master_tile_ctl = dg1_intr_disable(xe); if (!master_tile_ctl) { @@ -580,6 +578,11 @@ static void xe_irq_reset(struct xe_device *xe) if (IS_SRIOV_VF(xe)) return vf_irq_reset(xe); + if (xe_device_uses_memirq(xe)) { + for_each_tile(tile, xe, id) + xe_memirq_reset(&tile->memirq); + } + for_each_tile(tile, xe, id) { if (GRAPHICS_VERx100(xe) >= 1210) dg1_irq_reset(tile); @@ -622,6 +625,14 @@ static void xe_irq_postinstall(struct xe_device *xe) if (IS_SRIOV_VF(xe)) return vf_irq_postinstall(xe); + if (xe_device_uses_memirq(xe)) { + struct xe_tile *tile; + unsigned int id; + + for_each_tile(tile, xe, id) + xe_memirq_postinstall(&tile->memirq); + } + xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); /* @@ -644,12 +655,8 @@ static irqreturn_t vf_mem_irq_handler(int irq, void *arg) struct xe_tile *tile; unsigned int id; - spin_lock(&xe->irq.lock); - if (!xe->irq.enabled) { - spin_unlock(&xe->irq.lock); + if (!atomic_read(&xe->irq.enabled)) return IRQ_NONE; - } - spin_unlock(&xe->irq.lock); for_each_tile(tile, xe, id) xe_memirq_handler(&tile->memirq); @@ -668,63 +675,85 @@ static irq_handler_t xe_irq_handler(struct xe_device *xe) return xelp_irq_handler; } -static void irq_uninstall(void *arg) +static int xe_irq_msi_request_irqs(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + irq_handler_t irq_handler; + int irq, err; + + irq_handler = xe_irq_handler(xe); + if (!irq_handler) { + drm_err(&xe->drm, "No supported interrupt handler"); + return -EINVAL; + } + + irq = pci_irq_vector(pdev, 0); + err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); + if (err < 0) { + drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err); + return err; + } + + return 0; +} + +static void xe_irq_msi_free(struct xe_device *xe) { - struct xe_device *xe = arg; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int irq; - if (!xe->irq.enabled) + irq = pci_irq_vector(pdev, 0); + free_irq(irq, xe); +} + +static void irq_uninstall(void *arg) +{ + struct xe_device *xe = arg; + + if (!atomic_xchg(&xe->irq.enabled, 0)) return; - xe->irq.enabled = false; xe_irq_reset(xe); - irq = pci_irq_vector(pdev, 0); - free_irq(irq, xe); + if (xe_device_has_msix(xe)) + xe_irq_msix_free(xe); + else + xe_irq_msi_free(xe); +} + +int xe_irq_init(struct xe_device *xe) +{ + spin_lock_init(&xe->irq.lock); + + return xe_irq_msix_init(xe); } int xe_irq_install(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - unsigned int irq_flags = PCI_IRQ_MSIX; - irq_handler_t irq_handler; - int err, irq, nvec; - - irq_handler = xe_irq_handler(xe); - if (!irq_handler) { - drm_err(&xe->drm, "No supported interrupt handler"); - return -EINVAL; - } + unsigned int irq_flags = PCI_IRQ_MSI; + int nvec = 1; + int err; xe_irq_reset(xe); - nvec = pci_msix_vec_count(pdev); - if (nvec <= 0) { - if (nvec == -EINVAL) { - /* MSIX capability is not supported in the device, using MSI */ - irq_flags = PCI_IRQ_MSI; - nvec = 1; - } else { - drm_err(&xe->drm, "MSIX: Failed getting count\n"); - return nvec; - } + if (xe_device_has_msix(xe)) { + nvec = xe->irq.msix.nvec; + irq_flags = PCI_IRQ_MSIX; } err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags); if (err < 0) { - drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err); + drm_err(&xe->drm, "Failed to allocate IRQ vectors: %d\n", err); return err; } - irq = pci_irq_vector(pdev, 0); - err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); - if (err < 0) { - drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err); + err = xe_device_has_msix(xe) ? xe_irq_msix_request_irqs(xe) : + xe_irq_msi_request_irqs(xe); + if (err) return err; - } - xe->irq.enabled = true; + atomic_set(&xe->irq.enabled, 1); xe_irq_postinstall(xe); @@ -735,20 +764,28 @@ int xe_irq_install(struct xe_device *xe) return 0; free_irq_handler: - free_irq(irq, xe); + if (xe_device_has_msix(xe)) + xe_irq_msix_free(xe); + else + xe_irq_msi_free(xe); return err; } -void xe_irq_suspend(struct xe_device *xe) +static void xe_irq_msi_synchronize_irq(struct xe_device *xe) { - int irq = to_pci_dev(xe->drm.dev)->irq; + synchronize_irq(to_pci_dev(xe->drm.dev)->irq); +} - spin_lock_irq(&xe->irq.lock); - xe->irq.enabled = false; /* no new irqs */ - spin_unlock_irq(&xe->irq.lock); +void xe_irq_suspend(struct xe_device *xe) +{ + atomic_set(&xe->irq.enabled, 0); /* no new irqs */ - synchronize_irq(irq); /* flush irqs */ + /* flush irqs */ + if (xe_device_has_msix(xe)) + xe_irq_msix_synchronize_irq(xe); + else + xe_irq_msi_synchronize_irq(xe); xe_irq_reset(xe); /* turn irqs off */ } @@ -762,10 +799,205 @@ void xe_irq_resume(struct xe_device *xe) * 1. no irq will arrive before the postinstall * 2. display is not yet resumed */ - xe->irq.enabled = true; + atomic_set(&xe->irq.enabled, 1); xe_irq_reset(xe); xe_irq_postinstall(xe); /* turn irqs on */ for_each_gt(gt, xe, id) xe_irq_enable_hwe(gt); } + +/* MSI-X related definitions and functions below. */ + +enum xe_irq_msix_static { + GUC2HOST_MSIX = 0, + DEFAULT_MSIX = XE_IRQ_DEFAULT_MSIX, + /* Must be last */ + NUM_OF_STATIC_MSIX, +}; + +static int xe_irq_msix_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int nvec = pci_msix_vec_count(pdev); + + if (nvec == -EINVAL) + return 0; /* MSI */ + + if (nvec < 0) { + drm_err(&xe->drm, "Failed getting MSI-X vectors count: %d\n", nvec); + return nvec; + } + + xe->irq.msix.nvec = nvec; + xa_init_flags(&xe->irq.msix.indexes, XA_FLAGS_ALLOC); + return 0; +} + +static irqreturn_t guc2host_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_tile *tile; + u8 id; + + if (!atomic_read(&xe->irq.enabled)) + return IRQ_NONE; + + for_each_tile(tile, xe, id) + xe_guc_irq_handler(&tile->primary_gt->uc.guc, + GUC_INTR_GUC2HOST); + + return IRQ_HANDLED; +} + +static irqreturn_t xe_irq_msix_default_hwe_handler(int irq, void *arg) +{ + unsigned int tile_id, gt_id; + struct xe_device *xe = arg; + struct xe_memirq *memirq; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_tile *tile; + struct xe_gt *gt; + + if (!atomic_read(&xe->irq.enabled)) + return IRQ_NONE; + + for_each_tile(tile, xe, tile_id) { + memirq = &tile->memirq; + if (!memirq->bo) + continue; + + for_each_gt(gt, xe, gt_id) { + if (gt->tile != tile) + continue; + + for_each_hw_engine(hwe, gt, id) + xe_memirq_hwe_handler(memirq, hwe); + } + } + + return IRQ_HANDLED; +} + +static int xe_irq_msix_alloc_vector(struct xe_device *xe, void *irq_buf, + bool dynamic_msix, u16 *msix) +{ + struct xa_limit limit; + int ret; + u32 id; + + limit = (dynamic_msix) ? XA_LIMIT(NUM_OF_STATIC_MSIX, xe->irq.msix.nvec - 1) : + XA_LIMIT(*msix, *msix); + ret = xa_alloc(&xe->irq.msix.indexes, &id, irq_buf, limit, GFP_KERNEL); + if (ret) + return ret; + + if (dynamic_msix) + *msix = id; + + return 0; +} + +static void xe_irq_msix_release_vector(struct xe_device *xe, u16 msix) +{ + xa_erase(&xe->irq.msix.indexes, msix); +} + +static int xe_irq_msix_request_irq_internal(struct xe_device *xe, irq_handler_t handler, + void *irq_buf, const char *name, u16 msix) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int ret, irq; + + irq = pci_irq_vector(pdev, msix); + if (irq < 0) + return irq; + + ret = request_irq(irq, handler, IRQF_SHARED, name, irq_buf); + if (ret < 0) + return ret; + + return 0; +} + +int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf, + const char *name, bool dynamic_msix, u16 *msix) +{ + int ret; + + ret = xe_irq_msix_alloc_vector(xe, irq_buf, dynamic_msix, msix); + if (ret) + return ret; + + ret = xe_irq_msix_request_irq_internal(xe, handler, irq_buf, name, *msix); + if (ret) { + drm_err(&xe->drm, "Failed to request IRQ for MSI-X %u\n", *msix); + xe_irq_msix_release_vector(xe, *msix); + return ret; + } + + return 0; +} + +void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int irq; + void *irq_buf; + + irq_buf = xa_load(&xe->irq.msix.indexes, msix); + if (!irq_buf) + return; + + irq = pci_irq_vector(pdev, msix); + if (irq < 0) { + drm_err(&xe->drm, "MSI-X %u can't be released, there is no matching IRQ\n", msix); + return; + } + + free_irq(irq, irq_buf); + xe_irq_msix_release_vector(xe, msix); +} + +int xe_irq_msix_request_irqs(struct xe_device *xe) +{ + int err; + u16 msix; + + msix = GUC2HOST_MSIX; + err = xe_irq_msix_request_irq(xe, guc2host_irq_handler, xe, + DRIVER_NAME "-guc2host", false, &msix); + if (err) + return err; + + msix = DEFAULT_MSIX; + err = xe_irq_msix_request_irq(xe, xe_irq_msix_default_hwe_handler, xe, + DRIVER_NAME "-default-msix", false, &msix); + if (err) { + xe_irq_msix_free_irq(xe, GUC2HOST_MSIX); + return err; + } + + return 0; +} + +void xe_irq_msix_free(struct xe_device *xe) +{ + unsigned long msix; + u32 *dummy; + + xa_for_each(&xe->irq.msix.indexes, msix, dummy) + xe_irq_msix_free_irq(xe, msix); + xa_destroy(&xe->irq.msix.indexes); +} + +void xe_irq_msix_synchronize_irq(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + unsigned long msix; + u32 *dummy; + + xa_for_each(&xe->irq.msix.indexes, msix, dummy) + synchronize_irq(pci_irq_vector(pdev, msix)); +} diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h index 067514e13675..a28bd577ba52 100644 --- a/drivers/gpu/drm/xe/xe_irq.h +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -6,13 +6,21 @@ #ifndef _XE_IRQ_H_ #define _XE_IRQ_H_ +#include + +#define XE_IRQ_DEFAULT_MSIX 1 + struct xe_device; struct xe_tile; struct xe_gt; +int xe_irq_init(struct xe_device *xe); int xe_irq_install(struct xe_device *xe); void xe_irq_suspend(struct xe_device *xe); void xe_irq_resume(struct xe_device *xe); void xe_irq_enable_hwe(struct xe_gt *gt); +int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf, + const char *name, bool dynamic_msix, u16 *msix); +void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix); #endif diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 22e58c6e2a35..bbb9ffbf6367 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -584,6 +584,7 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) { struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq; struct xe_device *xe = gt_to_xe(hwe->gt); + u8 num_regs; if (!xe_device_uses_memirq(xe)) return; @@ -593,12 +594,18 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); - regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + num_regs = xe_device_has_msix(xe) ? 3 : 2; + regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) | MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); + + if (xe_device_has_msix(xe)) { + regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr; + /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */ + } } static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) @@ -876,7 +883,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size) + struct xe_vm *vm, u32 ring_size, u16 msix_vec) { struct xe_gt *gt = hwe->gt; struct xe_tile *tile = gt_to_tile(gt); @@ -945,6 +952,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_drm_client_add_bo(vm->xef->client, lrc->bo); } + if (xe_device_has_msix(xe)) { + xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, + xe_memirq_status_ptr(&tile->memirq, hwe)); + xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR, + xe_memirq_source_ptr(&tile->memirq, hwe)); + xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec); + } + if (xe_gt_has_indirect_ring_state(gt)) { xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, __xe_lrc_indirect_ring_ggtt_addr(lrc)); @@ -1005,6 +1020,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * @hwe: Hardware Engine * @vm: The VM (address space) * @ring_size: LRC ring size + * @msix_vec: MSI-X interrupt vector (for platforms that support it) * * Allocate and initialize the Logical Ring Context (LRC). * @@ -1012,7 +1028,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * upon failure. */ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size) + u32 ring_size, u16 msix_vec) { struct xe_lrc *lrc; int err; @@ -1021,7 +1037,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, if (!lrc) return ERR_PTR(-ENOMEM); - err = xe_lrc_init(lrc, hwe, vm, ring_size); + err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec); if (err) { kfree(lrc); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index b459dcab8787..4206e6a8b50a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -42,7 +42,7 @@ struct xe_lrc_snapshot { #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size); + u32 ring_size, u16 msix_vec); void xe_lrc_destroy(struct kref *ref); /** diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1b97d90aadda..278bc96cf593 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1506,7 +1506,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, * using the default engine for the updates, they will be performed in the * order they grab the job_mutex. If different engines are used, external * synchronization is needed for overlapping updates to maintain page-table - * consistency. Note that the meaing of "overlapping" is that the updates + * consistency. Note that the meaning of "overlapping" is that the updates * touch the same page-table, which might be a higher-level page-directory. * If no pipelining is needed, then updates may be performed by the cpu. * diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index a48f239cad1c..d321a21aacf0 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -103,50 +103,11 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) } } -/* - * On top of all the multi-tile MMIO space there can be a platform-dependent - * extension for each tile, resulting in a layout like below: - * - * .----------------------. <- ext_base + tile_count * tile_mmio_ext_size - * | .... | - * |----------------------| <- ext_base + 2 * tile_mmio_ext_size - * | tile1->mmio_ext.regs | - * |----------------------| <- ext_base + 1 * tile_mmio_ext_size - * | tile0->mmio_ext.regs | - * |======================| <- ext_base = tile_count * tile_mmio_size - * | | - * | mmio.regs | - * | | - * '----------------------' <- 0MB - * - * Set up the tile[]->mmio_ext pointers/sizes. - */ -static void mmio_extension_setup(struct xe_device *xe, size_t tile_mmio_size, - size_t tile_mmio_ext_size) -{ - struct xe_tile *tile; - void __iomem *regs; - u8 id; - - if (!xe->info.has_mmio_ext) - return; - - regs = xe->mmio.regs + tile_mmio_size * xe->info.tile_count; - for_each_tile(tile, xe, id) { - tile->mmio_ext.regs_size = tile_mmio_ext_size; - tile->mmio_ext.regs = regs; - tile->mmio_ext.tile = tile; - regs += tile_mmio_ext_size; - } -} - int xe_mmio_probe_tiles(struct xe_device *xe) { size_t tile_mmio_size = SZ_16M; - size_t tile_mmio_ext_size = xe->info.tile_mmio_ext_size; mmio_multi_tile_setup(xe, tile_mmio_size); - mmio_extension_setup(xe, tile_mmio_size, tile_mmio_ext_size); return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 0f2c20e9204a..07b27114be9a 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -21,9 +21,6 @@ struct xe_modparam xe_modparam = { .probe_display = true, .guc_log_level = 3, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, -#ifdef CONFIG_PCI_IOV - .max_vfs = IS_ENABLED(CONFIG_DRM_XE_DEBUG) ? ~0 : 0, -#endif .wedged_mode = 1, /* the rest are 0 by default */ }; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 8dd55798ab31..fa873f3d0a9d 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -16,7 +16,6 @@ #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" -#include "regs/xe_lrc_layout.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_bb.h" @@ -28,7 +27,6 @@ #include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_guc_pc.h" -#include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" @@ -74,12 +72,6 @@ struct xe_oa_config { struct rcu_head rcu; }; -struct flex { - struct xe_reg reg; - u32 offset; - u32 value; -}; - struct xe_oa_open_param { struct xe_file *xef; u32 oa_unit_id; @@ -96,6 +88,8 @@ struct xe_oa_open_param { struct drm_xe_sync __user *syncs_user; int num_syncs; struct xe_sync_entry *syncs; + size_t oa_buffer_size; + int wait_num_reports; }; struct xe_oa_config_bo { @@ -240,11 +234,9 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 tail, hw_tail, partial_report_size, available; int report_size = stream->oa_buffer.format->size; - u32 tail, hw_tail; unsigned long flags; - bool pollin; - u32 partial_report_size; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); @@ -288,12 +280,12 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) stream->oa_buffer.tail = tail; - pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, - stream->oa_buffer.head) >= report_size; + available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); + stream->pollin = available >= stream->wait_num_reports * report_size; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - return pollin; + return stream->pollin; } static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) @@ -301,10 +293,8 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) struct xe_oa_stream *stream = container_of(hrtimer, typeof(*stream), poll_check_timer); - if (xe_oa_buffer_check_unlocked(stream)) { - stream->pollin = true; + if (xe_oa_buffer_check_unlocked(stream)) wake_up(&stream->poll_wq); - } hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); @@ -403,11 +393,19 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { - struct xe_mmio *mmio = &stream->gt->mmio; u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; + int size_exponent = __ffs(stream->oa_buffer.bo->size); + u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; + struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; + /* + * If oa buffer size is more than 16MB (exponent greater than 24), the + * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set. + */ + oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK, + size_exponent > 24 ? size_exponent - 20 : size_exponent - 17); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0); @@ -451,6 +449,12 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream) return val; } +static u32 __oactrl_used_bits(struct xe_oa_stream *stream) +{ + return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? + OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; +} + static void xe_oa_enable(struct xe_oa_stream *stream) { const struct xe_oa_format *format = stream->oa_buffer.format; @@ -471,14 +475,14 @@ static void xe_oa_enable(struct xe_oa_stream *stream) stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; - xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val); + xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); } static void xe_oa_disable(struct xe_oa_stream *stream) { struct xe_mmio *mmio = &stream->gt->mmio; - xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0); + xe_mmio_rmw32(mmio, __oa_regs(stream)->oa_ctrl, __oactrl_used_bits(stream), 0); if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl, OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) drm_err(&stream->oa->xe->drm, @@ -596,19 +600,38 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) return ret; } +static void xe_oa_lock_vma(struct xe_exec_queue *q) +{ + if (q->vm) { + down_read(&q->vm->lock); + xe_vm_lock(q->vm, false); + } +} + +static void xe_oa_unlock_vma(struct xe_exec_queue *q) +{ + if (q->vm) { + xe_vm_unlock(q->vm); + up_read(&q->vm->lock); + } +} + static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, struct xe_bb *bb) { + struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; struct xe_sched_job *job; struct dma_fence *fence; int err = 0; - /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ - job = xe_bb_create_job(stream->k_exec_q, bb); + xe_oa_lock_vma(q); + + job = xe_bb_create_job(q, bb); if (IS_ERR(job)) { err = PTR_ERR(job); goto exit; } + job->ggtt = true; if (deps == XE_OA_SUBMIT_ADD_DEPS) { for (int i = 0; i < stream->num_syncs && !err; i++) @@ -623,10 +646,13 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); + xe_oa_unlock_vma(q); + return fence; err_put_job: xe_sched_job_put(job); exit: + xe_oa_unlock_vma(q); return ERR_PTR(err); } @@ -675,63 +701,19 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream) dma_fence_put(stream->last_fence); } -static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, - struct xe_bb *bb, const struct flex *flex, u32 count) -{ - u32 offset = xe_bo_ggtt_addr(lrc->bo); - - do { - bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); - bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = flex->value; - - } while (flex++, --count); -} - -static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, - const struct flex *flex, u32 count) +static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) { struct dma_fence *fence; struct xe_bb *bb; int err; - bb = xe_bb_new(stream->gt, 4 * count, false); + bb = xe_bb_new(stream->gt, 2 * count + 1, false); if (IS_ERR(bb)) { err = PTR_ERR(bb); goto exit; } - xe_oa_store_flex(stream, lrc, bb, flex, count); - - fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_bb; - } - xe_bb_free(bb, fence); - dma_fence_put(fence); - - return 0; -free_bb: - xe_bb_free(bb, NULL); -exit: - return err; -} - -static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) -{ - struct dma_fence *fence; - struct xe_bb *bb; - int err; - - bb = xe_bb_new(stream->gt, 3, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; - } - - write_cs_mi_lri(bb, reg_lri, 1); + write_cs_mi_lri(bb, reg_lri, count); fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); if (IS_ERR(fence)) { @@ -751,71 +733,55 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, + { + OAR_OACONTROL, + oacontrol, + }, { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) }, }; - struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; - int err; - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, + { + OAC_OACONTROL, + oacontrol + }, { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; - struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; - int err; /* Set ccs select to enable programming of OAC_OACONTROL */ xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) @@ -901,15 +867,12 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_file_put(stream->xef); } -static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) +static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { struct xe_bo *bo; - BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); - BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, - XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, + size, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1087,6 +1050,13 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); } +static u32 oag_buf_size_select(const struct xe_oa_stream *stream) +{ + return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, + stream->oa_buffer.bo->size > SZ_16M ? + OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); +} + static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) { struct xe_mmio *mmio = &stream->gt->mmio; @@ -1119,6 +1089,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, _MASKED_BIT_ENABLE(oa_debug) | oag_report_ctx_switches(stream) | + oag_buf_size_select(stream) | oag_configure_mmio_trigger(stream, true)); xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? @@ -1260,6 +1231,28 @@ static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, return 0; } +static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) { + drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value); + return -EINVAL; + } + param->oa_buffer_size = value; + return 0; +} + +static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (!value) { + drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value); + return -EINVAL; + } + param->wait_num_reports = value; + return 0; +} + static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { @@ -1280,6 +1273,8 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports, }; static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { @@ -1294,6 +1289,8 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval, }; static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, @@ -1553,7 +1550,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1639,7 +1636,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { + if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1677,81 +1674,6 @@ static const struct file_operations xe_oa_fops = { .mmap = xe_oa_mmap, }; -static bool engine_supports_mi_query(struct xe_hw_engine *hwe) -{ - return hwe->class == XE_ENGINE_CLASS_RENDER || - hwe->class == XE_ENGINE_CLASS_COMPUTE; -} - -static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) -{ - u32 idx = *offset; - u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); - bool found = false; - - idx++; - for (; idx < len; idx += 2) { - if (state[idx] == reg) { - found = true; - break; - } - } - - *offset = idx; - return found; -} - -#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ - REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) - -static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) -{ - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + - lrc->ring.size) / sizeof(u32); - u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 *state = (u32 *)lrc->bo->vmap.vaddr; - - if (drm_WARN_ON(&stream->oa->xe->drm, !state)) - return U32_MAX; - - for (; offset < len; ) { - if (IS_MI_LRI_CMD(state[offset])) { - /* - * We expect reg-value pairs in MI_LRI command, so - * MI_LRI_LEN() should be even - */ - drm_WARN_ON(&stream->oa->xe->drm, - MI_LRI_LEN(state[offset]) & 0x1); - - if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) - break; - } else { - offset++; - } - } - - return offset < len ? offset : U32_MAX; -} - -static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) -{ - struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); - u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; - - /* Do this only once. Failure is stored as offset of U32_MAX */ - if (offset) - goto exit; - - offset = xe_oa_context_image_offset(stream, reg.addr); - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; - - drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", - stream->hwe->name, offset); -exit: - return offset && offset != U32_MAX ? 0 : -ENODEV; -} - static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { @@ -1770,6 +1692,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; + stream->wait_num_reports = param->wait_num_reports; stream->xef = xe_file_get(param->xef); stream->num_syncs = param->num_syncs; @@ -1783,20 +1706,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, if (GRAPHICS_VER(stream->oa->xe) >= 20 && stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) stream->oa_buffer.circ_size = - XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; + param->oa_buffer_size - + param->oa_buffer_size % stream->oa_buffer.format->size; else - stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; - - if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { - /* If we don't find the context offset, just return error */ - ret = xe_oa_set_ctx_ctrl_offset(stream); - if (ret) { - drm_err(&stream->oa->xe->drm, - "xe_oa_set_ctx_ctrl_offset failed for %s\n", - stream->hwe->name); - goto exit; - } - } + stream->oa_buffer.circ_size = param->oa_buffer_size; stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); if (!stream->oa_config) { @@ -1828,7 +1741,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, goto err_fw_put; } - ret = xe_oa_alloc_oa_buffer(stream); + ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size); if (ret) goto err_fw_put; @@ -2066,8 +1979,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; - if (param.exec_q->width > 1) - drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) + return -EOPNOTSUPP; } /* @@ -2125,6 +2038,17 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); } + if (!param.oa_buffer_size) + param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; + + if (!param.wait_num_reports) + param.wait_num_reports = 1; + if (param.wait_num_reports > param.oa_buffer_size / f->size) { + drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports); + ret = -EINVAL; + goto err_exec_q; + } + ret = xe_oa_parse_syncs(oa, ¶m); if (ret) goto err_exec_q; @@ -2242,6 +2166,7 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ @@ -2612,6 +2537,8 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) u->type = DRM_XE_OA_UNIT_TYPE_OAM; } + xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0); + /* Ensure MMIO trigger remains disabled till there is a stream */ xe_mmio_write32(>->mmio, u->regs.oa_debug, oag_configure_mmio_trigger(NULL, false)); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index fea9d981e414..52e33c37d5ee 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -15,7 +15,7 @@ #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" -#define XE_OA_BUFFER_SIZE SZ_16M +#define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M enum xe_oa_report_header { HDR_32_BIT = 0, @@ -138,9 +138,6 @@ struct xe_oa { /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ struct idr metrics_idr; - /** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */ - u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX]; - /** @oa_formats: tracks all OA formats across platforms */ const struct xe_oa_format *oa_formats; @@ -218,6 +215,9 @@ struct xe_oa_stream { /** @pollin: Whether there is data available to read */ bool pollin; + /** @wait_num_reports: Number of reports to wait for before signalling pollin */ + int wait_num_reports; + /** @periodic: Whether periodic sampling is currently enabled */ bool periodic; diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 7e64428f518e..30fdbdb9341e 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -100,6 +100,10 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { * Reserved entries should be programmed with the maximum caching, minimum * coherency (which matches an all-0's encoding), so we can just omit them * in the table. + * + * Note: There is an implicit assumption in the driver that compression and + * coh_1way+ are mutually exclusive. If this is ever not true then userptr + * and imported dma-buf from external device will have uncleared ccs state. */ #define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ { \ @@ -109,7 +113,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \ REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ - .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ + .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \ + XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ } static const struct xe_pat_table_entry xe2_pat_table[] = { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 7d146e3e8e21..87ae2cf943b4 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -30,6 +30,7 @@ #include "xe_pm.h" #include "xe_sriov.h" #include "xe_step.h" +#include "xe_survivability_mode.h" #include "xe_tile.h" enum toggle_d3cold { @@ -61,7 +62,6 @@ struct xe_device_desc { u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; u8 has_llc:1; - u8 has_mmio_ext:1; u8 has_sriov:1; u8 skip_guc_pc:1; u8 skip_mtcfg:1; @@ -241,7 +241,6 @@ static const struct xe_device_desc adl_s_desc = { PLATFORM(ALDERLAKE_S), .has_display = true, .has_llc = true, - .has_sriov = IS_ENABLED(CONFIG_DRM_XE_DEBUG), .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -257,7 +256,6 @@ static const struct xe_device_desc adl_p_desc = { PLATFORM(ALDERLAKE_P), .has_display = true, .has_llc = true, - .has_sriov = IS_ENABLED(CONFIG_DRM_XE_DEBUG), .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -271,7 +269,6 @@ static const struct xe_device_desc adl_n_desc = { PLATFORM(ALDERLAKE_N), .has_display = true, .has_llc = true, - .has_sriov = IS_ENABLED(CONFIG_DRM_XE_DEBUG), .require_force_probe = true, }; @@ -310,7 +307,6 @@ static const struct xe_device_desc ats_m_desc = { DG2_FEATURES, .has_display = false, - .has_sriov = IS_ENABLED(CONFIG_DRM_XE_DEBUG), }; static const struct xe_device_desc dg2_desc = { @@ -322,7 +318,7 @@ static const struct xe_device_desc dg2_desc = { .has_display = true, }; -static const struct xe_device_desc pvc_desc = { +static const __maybe_unused struct xe_device_desc pvc_desc = { .graphics = &graphics_xehpc, DGFX_FEATURES, PLATFORM(PVC), @@ -353,6 +349,7 @@ static const struct xe_device_desc bmg_desc = { static const struct xe_device_desc ptl_desc = { PLATFORM(PANTHERLAKE), .has_display = true, + .has_sriov = true, .require_force_probe = true, }; @@ -397,7 +394,6 @@ static const struct pci_device_id pciidlist[] = { INTEL_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), INTEL_ARL_IDS(INTEL_VGA_DEVICE, &mtl_desc), INTEL_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), - INTEL_PVC_IDS(INTEL_VGA_DEVICE, &pvc_desc), INTEL_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), INTEL_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), @@ -495,7 +491,7 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, * least basic xe_gt and xe_guc initialization. * * Since to obtain the value of GMDID_MEDIA we need to use the - * media GuC, temporarly tweak the gt type. + * media GuC, temporarily tweak the gt type. */ xe_gt_assert(gt, gt->info.type == XE_GT_TYPE_UNINITIALIZED); @@ -507,6 +503,7 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, gt->info.type = XE_GT_TYPE_MAIN; } + xe_gt_mmio_init(gt); xe_guc_comm_init_early(>->uc.guc); /* Don't bother with GMDID if failed to negotiate the GuC ABI */ @@ -622,7 +619,6 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; - xe->info.has_mmio_ext = desc->has_mmio_ext; xe->info.has_sriov = desc->has_sriov; xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; @@ -682,7 +678,6 @@ static int xe_info_init(struct xe_device *xe, xe->info.graphics_name = graphics_desc->name; xe->info.media_name = media_desc ? media_desc->name : "none"; - xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size; xe->info.dma_mask_size = graphics_desc->dma_mask_size; xe->info.vram_flags = graphics_desc->vram_flags; @@ -768,6 +763,9 @@ static void xe_pci_remove(struct pci_dev *pdev) if (IS_SRIOV_PF(xe)) xe_pci_sriov_configure(pdev, 0); + if (xe_survivability_mode_enabled(xe)) + return xe_survivability_mode_remove(xe); + xe_device_remove(xe); xe_pm_runtime_fini(xe); pci_set_drvdata(pdev, NULL); @@ -786,7 +784,7 @@ static void xe_pci_remove(struct pci_dev *pdev) * error injectable functions is proper handling of the error code by the * caller for recovery, which is always the case here. The second * requirement is that no state is changed before the first error return. - * It is not strictly fullfilled for all initialization functions using the + * It is not strictly fulfilled for all initialization functions using the * ALLOW_ERROR_INJECTION() macro but this is acceptable because for those * error cases at probe time, the error code is simply propagated up by the * caller. Therefore there is no consequence on those specific callers when @@ -798,7 +796,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) const struct xe_subplatform_desc *subplatform_desc; struct xe_device *xe; int err; - + dev_info(&pdev->dev, "xiaolin@%s:06:/home/xzhan34/work/dii-gfx/drivers.gpu.linux-xe.kernel mmio\n", __func__); if (desc->require_force_probe && !id_forced(pdev->device)) { dev_info(&pdev->dev, "Your graphics device %04x is not officially supported\n" @@ -840,8 +838,19 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; err = xe_device_probe_early(xe); - if (err) + + /* + * In Boot Survivability mode, no drm card is exposed + * and driver is loaded with bare minimum to allow + * for firmware to be flashed through mei. Return + * success if survivability mode is enabled. + */ + if (err) { + if (xe_survivability_mode_enabled(xe)) + return 0; + return err; + } err = xe_info_init(xe, desc->graphics, desc->media); if (err) @@ -928,9 +937,13 @@ static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle) static int xe_pci_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); int err; - err = xe_pm_suspend(pdev_to_xe_device(pdev)); + if (xe_survivability_mode_enabled(xe)) + return -EBUSY; + + err = xe_pm_suspend(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 79b0f80376a4..873efec5cdee 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -20,8 +20,6 @@ struct xe_graphics_desc { u64 hw_engine_mask; /* hardware engines provided by graphics IP */ - u32 tile_mmio_ext_size; /* size of MMIO extension space, per-tile */ - u8 max_remote_tiles:2; u8 has_asid:1; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index d95d9835de42..9333ce776a6e 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -217,7 +217,7 @@ int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, * * It returns 0 on success, and -ERROR number on failure, -EINVAL if max * frequency is higher then the minimal, and other errors directly translated - * from the PCODE Error returs: + * from the PCODE Error returns: * - -ENXIO: "Illegal Command" * - -ETIMEDOUT: "Timed out" * - -EINVAL: "Illegal Data" diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index f153ce96f69a..2bae9afdbd35 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -49,6 +49,20 @@ /* Domain IDs (param2) */ #define PCODE_MBOX_DOMAIN_HBM 0x2 +#define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4)) +/* PCODE_SCRATCH0 */ +#define AUXINFO_REG_OFFSET REG_GENMASK(17, 15) +#define OVERFLOW_REG_OFFSET REG_GENMASK(14, 12) +#define HISTORY_TRACKING REG_BIT(11) +#define OVERFLOW_SUPPORT REG_BIT(10) +#define AUXINFO_SUPPORT REG_BIT(9) +#define BOOT_STATUS REG_GENMASK(3, 1) +#define CRITICAL_FAILURE 4 +#define NON_CRITICAL_FAILURE 7 + +/* Auxiliary info bits */ +#define AUXINFO_HISTORY_OFFSET REG_GENMASK(31, 29) + struct pcode_err_decode { int errno; const char *str; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index a6761cb769b2..e434b0b1870b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -390,7 +391,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify - * also checks and delets bo entry from user fault list. + * also checks and deletes bo entry from user fault list. */ mutex_lock(&xe->mem_access.vram_userfault.lock); list_for_each_entry_safe(bo, on, @@ -414,8 +415,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_irq_suspend(xe); - xe_display_pm_runtime_suspend_late(xe); - + if (xe->d3cold.allowed) + xe_display_pm_suspend_late(xe); out: if (err) xe_display_pm_runtime_resume(xe); @@ -607,7 +608,8 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe) struct device *dev = xe->drm.dev; return dev->power.runtime_status == RPM_SUSPENDING || - dev->power.runtime_status == RPM_RESUMING; + dev->power.runtime_status == RPM_RESUMING || + pm_suspend_target_state != PM_SUSPEND_ON; #else return false; #endif diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c new file mode 100644 index 000000000000..3910a82328ee --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include +#include + +#include "xe_device.h" +#include "xe_gt_idle.h" +#include "xe_pm.h" +#include "xe_pmu.h" + +/** + * DOC: Xe PMU (Performance Monitoring Unit) + * + * Expose events/counters like GT-C6 residency and GT frequency to user land via + * the perf interface. Events are per device. The GT can be selected with an + * extra config sub-field (bits 60-63). + * + * All events are listed in sysfs: + * + * $ ls -ld /sys/bus/event_source/devices/xe_* + * $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/events/ + * $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/format/ + * + * The format directory has info regarding the configs that can be used. + * The standard perf tool can be used to grep for a certain event as well. + * Example: + * + * $ perf list | grep gt-c6 + * + * To sample a specific event for a GT at regular intervals: + * + * $ perf stat -e -I + */ + +#define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60) +#define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0) + +static unsigned int config_to_event_id(u64 config) +{ + return FIELD_GET(XE_PMU_EVENT_ID_MASK, config); +} + +static unsigned int config_to_gt_id(u64 config) +{ + return FIELD_GET(XE_PMU_EVENT_GT_MASK, config); +} + +#define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01 + +static struct xe_gt *event_to_gt(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + u64 gt = config_to_gt_id(event->attr.config); + + return xe_device_get_gt(xe, gt); +} + +static bool event_supported(struct xe_pmu *pmu, unsigned int gt, + unsigned int id) +{ + if (gt >= XE_MAX_GT_PER_TILE) + return false; + + return id < sizeof(pmu->supported_events) * BITS_PER_BYTE && + pmu->supported_events & BIT_ULL(id); +} + +static void xe_pmu_event_destroy(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + + drm_WARN_ON(&xe->drm, event->parent); + xe_pm_runtime_put(xe); + drm_dev_put(&xe->drm); +} + +static int xe_pmu_event_init(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + unsigned int id, gt; + + if (!pmu->registered) + return -ENODEV; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + gt = config_to_gt_id(event->attr.config); + id = config_to_event_id(event->attr.config); + if (!event_supported(pmu, gt, id)) + return -ENOENT; + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (!event->parent) { + drm_dev_get(&xe->drm); + xe_pm_runtime_get(xe); + event->destroy = xe_pmu_event_destroy; + } + + return 0; +} + +static u64 __xe_pmu_event_read(struct perf_event *event) +{ + struct xe_gt *gt = event_to_gt(event); + + if (!gt) + return 0; + + switch (config_to_event_id(event->attr.config)) { + case XE_PMU_EVENT_GT_C6_RESIDENCY: + return xe_gt_idle_residency_msec(>->gtidle); + } + + return 0; +} + +static void xe_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new; + + prev = local64_read(&hwc->prev_count); + do { + new = __xe_pmu_event_read(event); + } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new)); + + local64_add(new - prev, &event->count); +} + +static void xe_pmu_event_read(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (!pmu->registered) { + event->hw.state = PERF_HES_STOPPED; + return; + } + + xe_pmu_event_update(event); +} + +static void xe_pmu_enable(struct perf_event *event) +{ + /* + * Store the current counter value so we can report the correct delta + * for all listeners. Even when the event was already enabled and has + * an existing non-zero value. + */ + local64_set(&event->hw.prev_count, __xe_pmu_event_read(event)); +} + +static void xe_pmu_event_start(struct perf_event *event, int flags) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (!pmu->registered) + return; + + xe_pmu_enable(event); + event->hw.state = 0; +} + +static void xe_pmu_event_stop(struct perf_event *event, int flags) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (pmu->registered) + if (flags & PERF_EF_UPDATE) + xe_pmu_event_update(event); + + event->hw.state = PERF_HES_STOPPED; +} + +static int xe_pmu_event_add(struct perf_event *event, int flags) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (!pmu->registered) + return -ENODEV; + + if (flags & PERF_EF_START) + xe_pmu_event_start(event, flags); + + return 0; +} + +static void xe_pmu_event_del(struct perf_event *event, int flags) +{ + xe_pmu_event_stop(event, PERF_EF_UPDATE); +} + +PMU_FORMAT_ATTR(gt, "config:60-63"); +PMU_FORMAT_ATTR(event, "config:0-11"); + +static struct attribute *pmu_format_attrs[] = { + &format_attr_event.attr, + &format_attr_gt.attr, + NULL, +}; + +static const struct attribute_group pmu_format_attr_group = { + .name = "format", + .attrs = pmu_format_attrs, +}; + +static ssize_t event_attr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(buf, "event=%#04llx\n", pmu_attr->id); +} + +#define XE_EVENT_ATTR(name_, v_, id_) \ + PMU_EVENT_ATTR(name_, pmu_event_ ## v_, id_, event_attr_show) + +#define XE_EVENT_ATTR_UNIT(name_, v_, unit_) \ + PMU_EVENT_ATTR_STRING(name_.unit, pmu_event_unit_ ## v_, unit_) + +#define XE_EVENT_ATTR_GROUP(v_, id_, ...) \ + static struct attribute *pmu_attr_ ##v_[] = { \ + __VA_ARGS__, \ + NULL \ + }; \ + static umode_t is_visible_##v_(struct kobject *kobj, \ + struct attribute *attr, int idx) \ + { \ + struct perf_pmu_events_attr *pmu_attr; \ + struct xe_pmu *pmu; \ + \ + pmu_attr = container_of(attr, typeof(*pmu_attr), attr.attr); \ + pmu = container_of(dev_get_drvdata(kobj_to_dev(kobj)), \ + typeof(*pmu), base); \ + \ + return event_supported(pmu, 0, id_) ? attr->mode : 0; \ + } \ + static const struct attribute_group pmu_group_ ##v_ = { \ + .name = "events", \ + .attrs = pmu_attr_ ## v_, \ + .is_visible = is_visible_ ## v_, \ + } + +#define XE_EVENT_ATTR_SIMPLE(name_, v_, id_, unit_) \ + XE_EVENT_ATTR(name_, v_, id_) \ + XE_EVENT_ATTR_UNIT(name_, v_, unit_) \ + XE_EVENT_ATTR_GROUP(v_, id_, &pmu_event_ ##v_.attr.attr, \ + &pmu_event_unit_ ##v_.attr.attr) + +#define XE_EVENT_ATTR_NOUNIT(name_, v_, id_) \ + XE_EVENT_ATTR(name_, v_, id_) \ + XE_EVENT_ATTR_GROUP(v_, id_, &pmu_event_ ##v_.attr.attr) + +XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency, XE_PMU_EVENT_GT_C6_RESIDENCY, "ms"); + +static struct attribute *pmu_empty_event_attrs[] = { + /* Empty - all events are added as groups with .attr_update() */ + NULL, +}; + +static const struct attribute_group pmu_events_attr_group = { + .name = "events", + .attrs = pmu_empty_event_attrs, +}; + +static const struct attribute_group *pmu_events_attr_update[] = { + &pmu_group_gt_c6_residency, + NULL, +}; + +static void set_supported_events(struct xe_pmu *pmu) +{ + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + + if (!xe->info.skip_guc_pc) + pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY); +} + +/** + * xe_pmu_unregister() - Remove/cleanup PMU registration + * @arg: Ptr to pmu + */ +static void xe_pmu_unregister(void *arg) +{ + struct xe_pmu *pmu = arg; + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + + if (!pmu->registered) + return; + + pmu->registered = false; + + perf_pmu_unregister(&pmu->base); + kfree(pmu->name); +} + +/** + * xe_pmu_register() - Define basic PMU properties for Xe and add event callbacks. + * @pmu: the PMU object + * + * Returns 0 on success and an appropriate error code otherwise + */ +int xe_pmu_register(struct xe_pmu *pmu) +{ + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + static const struct attribute_group *attr_groups[] = { + &pmu_format_attr_group, + &pmu_events_attr_group, + NULL + }; + int ret = -ENOMEM; + char *name; + + BUILD_BUG_ON(XE_MAX_GT_PER_TILE != XE_PMU_MAX_GT); + + if (IS_SRIOV_VF(xe)) + return 0; + + name = kasprintf(GFP_KERNEL, "xe_%s", + dev_name(xe->drm.dev)); + if (!name) + goto err; + + /* tools/perf reserves colons as special. */ + strreplace(name, ':', '_'); + + pmu->name = name; + pmu->base.attr_groups = attr_groups; + pmu->base.attr_update = pmu_events_attr_update; + pmu->base.scope = PERF_PMU_SCOPE_SYS_WIDE; + pmu->base.module = THIS_MODULE; + pmu->base.task_ctx_nr = perf_invalid_context; + pmu->base.event_init = xe_pmu_event_init; + pmu->base.add = xe_pmu_event_add; + pmu->base.del = xe_pmu_event_del; + pmu->base.start = xe_pmu_event_start; + pmu->base.stop = xe_pmu_event_stop; + pmu->base.read = xe_pmu_event_read; + + set_supported_events(pmu); + + ret = perf_pmu_register(&pmu->base, pmu->name, -1); + if (ret) + goto err_name; + + pmu->registered = true; + + return devm_add_action_or_reset(xe->drm.dev, xe_pmu_unregister, pmu); + +err_name: + kfree(name); +err: + drm_err(&xe->drm, "Failed to register PMU (ret=%d)!\n", ret); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h new file mode 100644 index 000000000000..60c37126f87e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pmu.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PMU_H_ +#define _XE_PMU_H_ + +#include "xe_pmu_types.h" + +#if IS_ENABLED(CONFIG_PERF_EVENTS) +int xe_pmu_register(struct xe_pmu *pmu); +#else +static inline int xe_pmu_register(struct xe_pmu *pmu) { return 0; } +#endif + +#endif + diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h new file mode 100644 index 000000000000..f5ba4d56622c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pmu_types.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PMU_TYPES_H_ +#define _XE_PMU_TYPES_H_ + +#include +#include + +#define XE_PMU_MAX_GT 2 + +/** + * struct xe_pmu - PMU related data per Xe device + * + * Stores per device PMU info that includes event/perf attributes and sampling + * counters across all GTs for this device. + */ +struct xe_pmu { + /** + * @base: PMU base. + */ + struct pmu base; + /** + * @registered: PMU is registered and not in the unregistering process. + */ + bool registered; + /** + * @name: Name as registered with perf core. + */ + const char *name; + /** + * @supported_events: Bitmap of supported events, indexed by event id + */ + u64 supported_events; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 65c3c1688710..1ddcc7e79a93 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -276,7 +276,7 @@ struct xe_pt_stage_bind_walk { /* Also input, but is updated during the walk*/ /** @curs: The DMA address cursor. */ struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ + /** @va_curs_start: The Virtual address corresponding to @curs->start */ u64 va_curs_start; /* Output */ diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 3eda616f1502..c059639613f7 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -671,7 +671,9 @@ static int query_oa_units(struct xe_device *xe, du->oa_unit_id = u->oa_unit_id; du->oa_unit_type = u->type; du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); - du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; + du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | + DRM_XE_OA_CAPS_OA_BUFFER_SIZE | + DRM_XE_OA_CAPS_WAIT_NUM_REPORTS; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index c13123008e90..9475e3f74958 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -24,7 +24,6 @@ #include "xe_hw_engine_types.h" #include "xe_macros.h" #include "xe_mmio.h" -#include "xe_reg_whitelist.h" #include "xe_rtp_types.h" static void reg_sr_fini(struct drm_device *drm, void *arg) @@ -192,58 +191,6 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n"); } -void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) -{ - struct xe_reg_sr *sr = &hwe->reg_whitelist; - struct xe_gt *gt = hwe->gt; - struct xe_device *xe = gt_to_xe(gt); - struct xe_reg_sr_entry *entry; - struct drm_printer p; - u32 mmio_base = hwe->mmio_base; - unsigned long reg; - unsigned int slot = 0; - unsigned int fw_ref; - - if (xa_empty(&sr->xa)) - return; - - drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) - goto err_force_wake; - - p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); - xa_for_each(&sr->xa, reg, entry) { - if (slot == RING_MAX_NONPRIV_SLOTS) { - xe_gt_err(gt, - "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", - hwe->name, RING_MAX_NONPRIV_SLOTS); - break; - } - - xe_reg_whitelist_print_entry(&p, 0, reg, entry); - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), - reg | entry->set_bits); - slot++; - } - - /* And clear the rest just in case of garbage */ - for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) { - u32 addr = RING_NOPID(mmio_base).addr; - - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); - } - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return; - -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n"); -} - /** * xe_reg_sr_dump - print all save/restore entries * @sr: Save/restore entries diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 3996934974fa..edab5d4e3ba5 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -10,7 +10,9 @@ #include "regs/xe_oa_regs.h" #include "regs/xe_regs.h" #include "xe_gt_types.h" +#include "xe_gt_printk.h" #include "xe_platform_types.h" +#include "xe_reg_sr.h" #include "xe_rtp.h" #include "xe_step.h" @@ -89,6 +91,40 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { {} }; +static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) +{ + struct xe_reg_sr *sr = &hwe->reg_whitelist; + struct xe_reg_sr_entry *entry; + struct drm_printer p; + unsigned long reg; + unsigned int slot; + + xe_gt_dbg(hwe->gt, "Add %s whitelist to engine\n", sr->name); + p = xe_gt_dbg_printer(hwe->gt); + + slot = 0; + xa_for_each(&sr->xa, reg, entry) { + struct xe_reg_sr_entry hwe_entry = { + .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot), + .set_bits = entry->reg.addr | entry->set_bits, + .clr_bits = ~0u, + .read_mask = entry->read_mask, + }; + + if (slot == RING_MAX_NONPRIV_SLOTS) { + xe_gt_err(hwe->gt, + "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", + hwe->name, RING_MAX_NONPRIV_SLOTS); + break; + } + + xe_reg_whitelist_print_entry(&p, 0, reg, entry); + xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt); + + slot++; + } +} + /** * xe_reg_whitelist_process_engine - process table of registers to whitelist * @hwe: engine instance to process whitelist for @@ -102,6 +138,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist); + whitelist_apply_to_hwe(hwe); } /** diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 0be4f489d3e1..9f327f27c072 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -221,7 +221,10 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, static u32 get_ppgtt_flag(struct xe_sched_job *job) { - return job->q->vm ? BIT(8) : 0; + if (job->q->vm && !job->ggtt) + return BIT(8); + + return 0; } static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index b13d4d62f0b1..7a1c78fdfc92 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -340,3 +340,8 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, return dss >= dss_per_gslice; } +bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return !IS_SRIOV_VF(gt_to_xe(gt)); +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 827d932b6908..38b9f13bba5e 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -131,7 +131,7 @@ struct xe_reg_sr; * @ver_end__: Last graphics IP version to match * * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. - * inclusive on boths sides + * inclusive on both sides * * Refer to XE_RTP_RULES() for expected usage. */ @@ -169,7 +169,7 @@ struct xe_reg_sr; * @ver_end__: Last media IP version to match * * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. - * inclusive on boths sides + * inclusive on both sides * * Refer to XE_RTP_RULES() for expected usage. */ @@ -476,4 +476,15 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, const struct xe_hw_engine *hwe); +/* + * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device + * + * @gt: GT structure + * @hwe: Engine instance + * + * Returns: true if device is not VF, false otherwise. + */ +bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index e055bed7ae55..f8fe61e25518 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -31,47 +31,55 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) sa_manager->bo = NULL; } -struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align) +/** + * __xe_sa_bo_manager_init() - Create and initialize the suballocator + * @tile: the &xe_tile where allocate + * @size: number of bytes to allocate + * @guard: number of bytes to exclude from suballocations + * @align: alignment for each suballocated chunk + * + * Prepares the suballocation manager for suballocations. + * + * Return: a pointer to the &xe_sa_manager or an ERR_PTR on failure. + */ +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align) { struct xe_device *xe = tile_to_xe(tile); - u32 managed_size = size - SZ_4K; + struct xe_sa_manager *sa_manager; + u32 managed_size; struct xe_bo *bo; int ret; - struct xe_sa_manager *sa_manager = drmm_kzalloc(&tile_to_xe(tile)->drm, - sizeof(*sa_manager), - GFP_KERNEL); + xe_tile_assert(tile, size > guard); + managed_size = size - guard; + + sa_manager = drmm_kzalloc(&xe->drm, sizeof(*sa_manager), GFP_KERNEL); if (!sa_manager) return ERR_PTR(-ENOMEM); - sa_manager->bo = NULL; - bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { - drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", - PTR_ERR(bo)); + drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n", + size / SZ_1K, bo); return ERR_CAST(bo); } sa_manager->bo = bo; sa_manager->is_iomem = bo->vmap.is_iomem; - - drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); - if (!sa_manager->cpu_ptr) { - sa_manager->bo = NULL; + if (!sa_manager->cpu_ptr) return ERR_PTR(-ENOMEM); - } } else { sa_manager->cpu_ptr = bo->vmap.vaddr; memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); } + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, sa_manager); if (ret) @@ -80,8 +88,17 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 return sa_manager; } -struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, - unsigned int size) +/** + * __xe_sa_bo_new() - Make a suballocation but use custom gfp flags. + * @sa_manager: the &xe_sa_manager + * @size: number of bytes we want to suballocate + * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL. + * + * Try to make a suballocation of size @size. + * + * Return: a &drm_suballoc, or an ERR_PTR. + */ +struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp) { /* * BB to large, return -ENOBUFS indicating user should split @@ -90,7 +107,7 @@ struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, if (size > sa_manager->base.size) return ERR_PTR(-ENOBUFS); - return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0); + return drm_suballoc_new(&sa_manager->base, size, gfp, true, 0); } void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 4e96483057d7..1170ee5a81a8 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -5,19 +5,37 @@ #ifndef _XE_SA_H_ #define _XE_SA_H_ +#include +#include #include "xe_sa_types.h" struct dma_fence; -struct xe_bo; struct xe_tile; -struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align); +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align); +struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp); + +static inline struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align) +{ + return __xe_sa_bo_manager_init(tile, size, SZ_4K, align); +} + +/** + * xe_sa_bo_new() - Make a suballocation. + * @sa_manager: the &xe_sa_manager + * @size: number of bytes we want to suballocate + * + * Try to make a suballocation of size @size. + * + * Return: a &drm_suballoc, or an ERR_PTR. + */ +static inline struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size) +{ + return __xe_sa_bo_new(sa_manager, size, GFP_KERNEL); +} -struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, - u32 size); void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); -void xe_sa_bo_free(struct drm_suballoc *sa_bo, - struct dma_fence *fence); +void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence); static inline struct xe_sa_manager * to_xe_sa_manager(struct drm_suballoc_manager *mng) diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index f13f333f00be..d942b20a9f29 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -56,6 +56,8 @@ struct xe_sched_job { u32 migrate_flush_flags; /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ bool ring_ops_flush_tlb; + /** @ggtt: mapped in ggtt. */ + bool ggtt; /** @ptrs: per instance pointers. */ struct xe_job_ptrs ptrs[]; }; diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 04e2f539ccd9..a0eab44c0e76 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -81,7 +81,7 @@ void xe_sriov_probe_early(struct xe_device *xe) xe->sriov.__mode = mode; xe_assert(xe, xe->sriov.__mode); - if (has_sriov) + if (IS_SRIOV(xe)) drm_info(&xe->drm, "Running in %s mode\n", xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); } diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c new file mode 100644 index 000000000000..c619560af74f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_survivability_mode.h" +#include "xe_survivability_mode_types.h" + +#include +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_heci_gsc.h" +#include "xe_mmio.h" +#include "xe_pcode_api.h" +#include "xe_vsec.h" + +#define MAX_SCRATCH_MMIO 8 + +/** + * DOC: Xe Boot Survivability + * + * Boot Survivability is a software based workflow for recovering a system in a failed boot state + * Here system recoverability is concerned with recovering the firmware responsible for boot. + * + * This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware + * to be flashed through mei and collect telemetry. The driver's probe flow is modified + * such that it enters survivability mode when pcode initialization is incomplete and boot status + * denotes a failure. The driver then populates the survivability_mode PCI sysfs indicating + * survivability mode and provides additional information required for debug + * + * KMD exposes below admin-only readable sysfs in survivability mode + * + * device/survivability_mode: The presence of this file indicates that the card is in survivability + * mode. Also, provides additional information on why the driver entered + * survivability mode. + * + * Capability Information - Provides boot status + * Postcode Information - Provides information about the failure + * Overflow Information - Provides history of previous failures + * Auxiliary Information - Certain failures may have information in + * addition to postcode information + */ + +static u32 aux_history_offset(u32 reg_value) +{ + return REG_FIELD_GET(AUXINFO_HISTORY_OFFSET, reg_value); +} + +static void set_survivability_info(struct xe_mmio *mmio, struct xe_survivability_info *info, + int id, char *name) +{ + strscpy(info[id].name, name, sizeof(info[id].name)); + info[id].reg = PCODE_SCRATCH(id).raw; + info[id].value = xe_mmio_read32(mmio, PCODE_SCRATCH(id)); +} + +static void populate_survivability_info(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info = survivability->info; + struct xe_mmio *mmio; + u32 id = 0, reg_value; + char name[NAME_MAX]; + int index; + + mmio = xe_root_tile_mmio(xe); + set_survivability_info(mmio, info, id, "Capability Info"); + reg_value = info[id].value; + + if (reg_value & HISTORY_TRACKING) { + id++; + set_survivability_info(mmio, info, id, "Postcode Info"); + + if (reg_value & OVERFLOW_SUPPORT) { + id = REG_FIELD_GET(OVERFLOW_REG_OFFSET, reg_value); + set_survivability_info(mmio, info, id, "Overflow Info"); + } + } + + if (reg_value & AUXINFO_SUPPORT) { + id = REG_FIELD_GET(AUXINFO_REG_OFFSET, reg_value); + + for (index = 0; id && reg_value; index++, reg_value = info[id].value, + id = aux_history_offset(reg_value)) { + snprintf(name, NAME_MAX, "Auxiliary Info %d", index); + set_survivability_info(mmio, info, id, name); + } + } +} + +static void log_survivability_info(struct pci_dev *pdev) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info = survivability->info; + int id; + + dev_info(&pdev->dev, "Survivability Boot Status : Critical Failure (%d)\n", + survivability->boot_status); + for (id = 0; id < MAX_SCRATCH_MMIO; id++) { + if (info[id].reg) + dev_info(&pdev->dev, "%s: 0x%x - 0x%x\n", info[id].name, + info[id].reg, info[id].value); + } +} + +static ssize_t survivability_mode_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info = survivability->info; + int index = 0, count = 0; + + for (index = 0; index < MAX_SCRATCH_MMIO; index++) { + if (info[index].reg) + count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name, + info[index].reg, info[index].value); + } + + return count; +} + +static DEVICE_ATTR_ADMIN_RO(survivability_mode); + +static void enable_survivability_mode(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + int ret = 0; + + /* set survivability mode */ + survivability->mode = true; + dev_info(dev, "In Survivability Mode\n"); + + /* create survivability mode sysfs */ + ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr); + if (ret) { + dev_warn(dev, "Failed to create survivability sysfs files\n"); + return; + } + + xe_heci_gsc_init(xe); + + xe_vsec_init(xe); +} + +/** + * xe_survivability_mode_enabled - check if survivability mode is enabled + * @xe: xe device instance + * + * Returns true if in survivability mode, false otherwise + */ +bool xe_survivability_mode_enabled(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + + return survivability->mode; +} + +/** + * xe_survivability_mode_required - checks if survivability mode is required + * @xe: xe device instance + * + * This function reads the boot status from Pcode + * + * Return: true if boot status indicates failure, false otherwise + */ +bool xe_survivability_mode_required(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + u32 data; + + if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE) + return false; + + data = xe_mmio_read32(mmio, PCODE_SCRATCH(0)); + survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data); + + return (survivability->boot_status == NON_CRITICAL_FAILURE || + survivability->boot_status == CRITICAL_FAILURE); +} + +/** + * xe_survivability_mode_remove - remove survivability mode + * @xe: xe device instance + * + * clean up sysfs entries of survivability mode + */ +void xe_survivability_mode_remove(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct device *dev = &pdev->dev; + + sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); + xe_heci_gsc_fini(xe); + kfree(survivability->info); + pci_set_drvdata(pdev, NULL); +} + +/** + * xe_survivability_mode_init - Initialize the survivability mode + * @xe: xe device instance + * + * Initializes survivability information and enables survivability mode + */ +void xe_survivability_mode_init(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + survivability->size = MAX_SCRATCH_MMIO; + + info = kcalloc(survivability->size, sizeof(*info), GFP_KERNEL); + if (!info) + return; + + survivability->info = info; + + populate_survivability_info(xe); + + /* Only log debug information and exit if it is a critical failure */ + if (survivability->boot_status == CRITICAL_FAILURE) { + log_survivability_info(pdev); + kfree(survivability->info); + return; + } + + enable_survivability_mode(pdev); +} diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h new file mode 100644 index 000000000000..f530507a22c6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SURVIVABILITY_MODE_H_ +#define _XE_SURVIVABILITY_MODE_H_ + +#include + +struct xe_device; + +void xe_survivability_mode_init(struct xe_device *xe); +void xe_survivability_mode_remove(struct xe_device *xe); +bool xe_survivability_mode_enabled(struct xe_device *xe); +bool xe_survivability_mode_required(struct xe_device *xe); + +#endif /* _XE_SURVIVABILITY_MODE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h new file mode 100644 index 000000000000..19d433e253df --- /dev/null +++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SURVIVABILITY_MODE_TYPES_H_ +#define _XE_SURVIVABILITY_MODE_TYPES_H_ + +#include +#include + +struct xe_survivability_info { + char name[NAME_MAX]; + u32 reg; + u32 value; +}; + +/** + * struct xe_survivability: Contains survivability mode information + */ +struct xe_survivability { + /** @info: struct that holds survivability info from scratch registers */ + struct xe_survivability_info *info; + + /** @size: number of scratch registers */ + u32 size; + + /** @boot_status: indicates critical/non critical boot failure */ + u8 boot_status; + + /** @mode: boolean to indicate survivability mode */ + bool mode; +}; + +#endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 07cf7cfe4abd..2825553b568f 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -170,10 +170,6 @@ int xe_tile_init_noalloc(struct xe_tile *tile) if (err) return err; - tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); - if (IS_ERR(tile->mem.kernel_bb_pool)) - return PTR_ERR(tile->mem.kernel_bb_pool); - xe_wa_apply_tile_workarounds(tile); err = xe_tile_sysfs_init(tile); @@ -181,6 +177,14 @@ int xe_tile_init_noalloc(struct xe_tile *tile) return 0; } +int xe_tile_init(struct xe_tile *tile) +{ + tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); + if (IS_ERR(tile->mem.kernel_bb_pool)) + return PTR_ERR(tile->mem.kernel_bb_pool); + + return 0; +} void xe_tile_migrate_wait(struct xe_tile *tile) { xe_migrate_wait(tile->migrate); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 1c9e42ade6b0..eb939316d55b 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -12,6 +12,7 @@ struct xe_tile; int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); +int xe_tile_init(struct xe_tile *tile); void xe_tile_migrate_wait(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index 1762dd30ba6d..ccebd5f0878e 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -53,6 +53,11 @@ DEFINE_EVENT(xe_bo, xe_bo_validate, TP_ARGS(bo) ); +DEFINE_EVENT(xe_bo, xe_bo_create, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + TRACE_EVENT(xe_bo_move, TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, bool move_lacks_source), @@ -60,8 +65,8 @@ TRACE_EVENT(xe_bo_move, TP_STRUCT__entry( __field(struct xe_bo *, bo) __field(size_t, size) - __field(u32, new_placement) - __field(u32, old_placement) + __string(new_placement_name, xe_mem_type_to_name[new_placement]) + __string(old_placement_name, xe_mem_type_to_name[old_placement]) __string(device_id, __dev_name_bo(bo)) __field(bool, move_lacks_source) ), @@ -69,15 +74,15 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; __entry->size = bo->size; - __entry->new_placement = new_placement; - __entry->old_placement = old_placement; + __assign_str(new_placement_name); + __assign_str(old_placement_name); __assign_str(device_id); __entry->move_lacks_source = move_lacks_source; ), TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, - xe_mem_type_to_name[__entry->old_placement], - xe_mem_type_to_name[__entry->new_placement], __get_str(device_id)) + __get_str(old_placement_name), + __get_str(new_placement_name), __get_str(device_id)) ); DECLARE_EVENT_CLASS(xe_vma, @@ -87,6 +92,7 @@ DECLARE_EVENT_CLASS(xe_vma, TP_STRUCT__entry( __string(dev, __dev_name_vma(vma)) __field(struct xe_vma *, vma) + __field(struct xe_vm *, vm) __field(u32, asid) __field(u64, start) __field(u64, end) @@ -96,14 +102,16 @@ DECLARE_EVENT_CLASS(xe_vma, TP_fast_assign( __assign_str(dev); __entry->vma = vma; + __entry->vm = xe_vma_vm(vma); __entry->asid = xe_vma_vm(vma)->usm.asid; __entry->start = xe_vma_start(vma); __entry->end = xe_vma_end(vma) - 1; __entry->ptr = xe_vma_userptr(vma); ), - TP_printk("dev=%s, vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", - __get_str(dev), __entry->vma, __entry->asid, __entry->start, + TP_printk("dev=%s, vma=%p, vm=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx", + __get_str(dev), __entry->vma, __entry->vm, + __entry->asid, __entry->start, __entry->end, __entry->ptr) ) @@ -185,16 +193,19 @@ DECLARE_EVENT_CLASS(xe_vm, __string(dev, __dev_name_vm(vm)) __field(struct xe_vm *, vm) __field(u32, asid) + __field(u32, flags) ), TP_fast_assign( __assign_str(dev); __entry->vm = vm; __entry->asid = vm->usm.asid; + __entry->flags = vm->flags; ), - TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev), - __entry->vm, __entry->asid) + TP_printk("dev=%s, vm=%p, asid=0x%05x, vm flags=0x%05x", + __get_str(dev), __entry->vm, __entry->asid, + __entry->flags) ); DEFINE_EVENT(xe_vm, xe_vm_kill, diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 0d8caa0e7354..ad3b35a0e6eb 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -92,7 +92,7 @@ struct xe_uc_fw { const enum xe_uc_fw_status status; /** * @__status: private firmware load status - only to be used - * by firmware laoding code + * by firmware loading code */ enum xe_uc_fw_status __status; }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b4a44e1ea167..e86c4cc47884 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -1024,7 +1025,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) /* * Since userptr pages are not pinned, we can't remove - * the notifer until we're sure the GPU is not accessing + * the notifier until we're sure the GPU is not accessing * them anymore */ mmu_interval_notifier_remove(&userptr->notifier); @@ -2107,7 +2108,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) } } - /* Adjust for partial unbind after removin VMA from VM */ + /* Adjust for partial unbind after removing VMA from VM */ if (!err) { op->base.remap.unmap->va->va.addr = op->remap.start; op->base.remap.unmap->va->va.range = op->remap.range; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 23adb7442881..c864dba35e1d 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -17,6 +17,7 @@ struct drm_printer; struct drm_file; struct ttm_buffer_object; +struct ttm_validate_buffer; struct xe_exec_queue; struct xe_file; diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index 078786958403..1030ce214032 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -431,7 +431,7 @@ * bind path also acquires this lock in write while the exec / compute mode * rebind worker acquires this lock in read mode. * - * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv + * VM dma-resv lock (vm->gpuvm.r_obj->resv->lock) - WW lock. Protects VM dma-resv * slots which is shared with any private BO in the VM. Expected to be acquired * during VM binds, execs, and compute mode rebind worker. This lock is also * held when private BOs are being evicted. diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c new file mode 100644 index 000000000000..edaec262ed0a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright © 2024 Intel Corporation */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_drv.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "xe_pm.h" +#include "xe_vsec.h" + +#include "regs/xe_pmt.h" + +/* PMT GUID value for BMG devices. NOTE: this is NOT a PCI id */ +#define BMG_DEVICE_ID 0xE2F8 + +static struct intel_vsec_header bmg_telemetry = { + .length = 0x10, + .id = VSEC_ID_TELEMETRY, + .num_entries = 2, + .entry_size = 4, + .tbir = 0, + .offset = BMG_DISCOVERY_OFFSET, +}; + +static struct intel_vsec_header bmg_punit_crashlog = { + .length = 0x10, + .id = VSEC_ID_CRASHLOG, + .num_entries = 1, + .entry_size = 4, + .tbir = 0, + .offset = BMG_DISCOVERY_OFFSET + 0x60, +}; + +static struct intel_vsec_header bmg_oobmsm_crashlog = { + .length = 0x10, + .id = VSEC_ID_CRASHLOG, + .num_entries = 1, + .entry_size = 4, + .tbir = 0, + .offset = BMG_DISCOVERY_OFFSET + 0x78, +}; + +static struct intel_vsec_header *bmg_capabilities[] = { + &bmg_telemetry, + &bmg_punit_crashlog, + &bmg_oobmsm_crashlog, + NULL +}; + +enum xe_vsec { + XE_VSEC_UNKNOWN = 0, + XE_VSEC_BMG, +}; + +static struct intel_vsec_platform_info xe_vsec_info[] = { + [XE_VSEC_BMG] = { + .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_CRASHLOG, + .headers = bmg_capabilities, + }, + { } +}; + +/* + * The GUID will have the following bits to decode: + * [0:3] - {Telemetry space iteration number (0,1,..)} + * [4:7] - Segment (SEGMENT_INDEPENDENT-0, Client-1, Server-2) + * [8:11] - SOC_SKU + * [12:27] – Device ID – changes for each down bin SKU’s + * [28:29] - Capability Type (Crashlog-0, Telemetry Aggregator-1, Watcher-2) + * [30:31] - Record-ID (0-PUNIT, 1-OOBMSM_0, 2-OOBMSM_1) + */ +#define GUID_TELEM_ITERATION GENMASK(3, 0) +#define GUID_SEGMENT GENMASK(7, 4) +#define GUID_SOC_SKU GENMASK(11, 8) +#define GUID_DEVICE_ID GENMASK(27, 12) +#define GUID_CAP_TYPE GENMASK(29, 28) +#define GUID_RECORD_ID GENMASK(31, 30) + +#define PUNIT_TELEMETRY_OFFSET 0x0200 +#define PUNIT_WATCHER_OFFSET 0x14A0 +#define OOBMSM_0_WATCHER_OFFSET 0x18D8 +#define OOBMSM_1_TELEMETRY_OFFSET 0x1000 + +enum record_id { + PUNIT, + OOBMSM_0, + OOBMSM_1, +}; + +enum capability { + CRASHLOG, + TELEMETRY, + WATCHER, +}; + +static int xe_guid_decode(u32 guid, int *index, u32 *offset) +{ + u32 record_id = FIELD_GET(GUID_RECORD_ID, guid); + u32 cap_type = FIELD_GET(GUID_CAP_TYPE, guid); + u32 device_id = FIELD_GET(GUID_DEVICE_ID, guid); + + if (device_id != BMG_DEVICE_ID) + return -ENODEV; + + if (cap_type > WATCHER) + return -EINVAL; + + *offset = 0; + + if (cap_type == CRASHLOG) { + *index = record_id == PUNIT ? 2 : 4; + return 0; + } + + switch (record_id) { + case PUNIT: + *index = 0; + if (cap_type == TELEMETRY) + *offset = PUNIT_TELEMETRY_OFFSET; + else + *offset = PUNIT_WATCHER_OFFSET; + break; + + case OOBMSM_0: + *index = 1; + if (cap_type == WATCHER) + *offset = OOBMSM_0_WATCHER_OFFSET; + break; + + case OOBMSM_1: + *index = 1; + if (cap_type == TELEMETRY) + *offset = OOBMSM_1_TELEMETRY_OFFSET; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, + u32 count) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET; + u32 mem_region; + u32 offset; + int ret; + + ret = xe_guid_decode(guid, &mem_region, &offset); + if (ret) + return ret; + + telem_addr += offset + user_offset; + + guard(mutex)(&xe->pmt.lock); + + /* indicate that we are not at an appropriate power level */ + if (!xe_pm_runtime_get_if_active(xe)) + return -ENODATA; + + /* set SoC re-mapper index register based on GUID memory region */ + xe_mmio_rmw32(xe_root_tile_mmio(xe), SG_REMAP_INDEX1, SG_REMAP_BITS, + REG_FIELD_PREP(SG_REMAP_BITS, mem_region)); + + memcpy_fromio(data, telem_addr, count); + xe_pm_runtime_put(xe); + + return count; +} + +static struct pmt_callbacks xe_pmt_cb = { + .read_telem = xe_pmt_telem_read, +}; + +static const int vsec_platforms[] = { + [XE_BATTLEMAGE] = XE_VSEC_BMG, +}; + +static enum xe_vsec get_platform_info(struct xe_device *xe) +{ + if (xe->info.platform > XE_BATTLEMAGE) + return XE_VSEC_UNKNOWN; + + return vsec_platforms[xe->info.platform]; +} + +/** + * xe_vsec_init - Initialize resources and add intel_vsec auxiliary + * interface + * @xe: valid xe instance + */ +void xe_vsec_init(struct xe_device *xe) +{ + struct intel_vsec_platform_info *info; + struct device *dev = xe->drm.dev; + struct pci_dev *pdev = to_pci_dev(dev); + enum xe_vsec platform; + + platform = get_platform_info(xe); + if (platform == XE_VSEC_UNKNOWN) + return; + + info = &xe_vsec_info[platform]; + if (!info->headers) + return; + + switch (platform) { + case XE_VSEC_BMG: + info->priv_data = &xe_pmt_cb; + break; + default: + break; + } + + /* + * Register a VSEC. Cleanup is handled using device managed + * resources. + */ + intel_vsec_register(pdev, info); +} +MODULE_IMPORT_NS(INTEL_VSEC); diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h new file mode 100644 index 000000000000..5777c53faec2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vsec.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright © 2024 Intel Corporation */ + +#ifndef _XE_VSEC_H_ +#define _XE_VSEC_H_ + +struct xe_device; + +void xe_vsec_init(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 570fe0376402..744dba4fdb58 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -613,6 +613,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(FIELD_SET(SAMPLER_MODE, SMP_WAIT_FETCH_MERGING_COUNTER, SMP_FORCE_128B_OVERFETCH)) }, + { XE_RTP_NAME("14023061436"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) + }, {} }; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 4a8a4a63e99c..e2160330ad01 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -811,6 +811,32 @@ struct drm_xe_gem_create { /** * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET + * + * The @flags can be: + * - %DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER - For user to query special offset + * for use in mmap ioctl. Writing to the returned mmap address will generate a + * PCI memory barrier with low overhead (avoiding IOCTL call as well as writing + * to VRAM which would also add overhead), acting like an MI_MEM_FENCE + * instruction. + * + * Note: The mmap size can be at most 4K, due to HW limitations. As a result + * this interface is only supported on CPU architectures that support 4K page + * size. The mmap_offset ioctl will detect this and gracefully return an + * error, where userspace is expected to have a different fallback method for + * triggering a barrier. + * + * Roughly the usage would be as follows: + * + * .. code-block:: C + * + * struct drm_xe_gem_mmap_offset mmo = { + * .handle = 0, // must be set to 0 + * .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER, + * }; + * + * err = ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo); + * map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo.offset); + * map[i] = 0xdeadbeaf; // issue barrier */ struct drm_xe_gem_mmap_offset { /** @extensions: Pointer to the first extension struct, if any */ @@ -819,7 +845,8 @@ struct drm_xe_gem_mmap_offset { /** @handle: Handle for the object being mapped. */ __u32 handle; - /** @flags: Must be zero */ +#define DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER (1 << 0) + /** @flags: Flags */ __u32 flags; /** @offset: The fake offset to use for subsequent mmap call */ @@ -1486,6 +1513,8 @@ struct drm_xe_oa_unit { __u64 capabilities; #define DRM_XE_OA_CAPS_BASE (1 << 0) #define DRM_XE_OA_CAPS_SYNCS (1 << 1) +#define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) +#define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; @@ -1651,6 +1680,20 @@ enum drm_xe_oa_property_id { * to the VM bind case. */ DRM_XE_OA_PROPERTY_SYNCS, + + /** + * @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Size of OA buffer to be + * allocated by the driver in bytes. Supported sizes are powers of + * 2 from 128 KiB to 128 MiB. When not specified, a 16 MiB OA + * buffer is allocated by default. + */ + DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE, + + /** + * @DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS: Number of reports to wait + * for before unblocking poll or read + */ + DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS, }; /**