Skip to content

Commit 8809b4b

Browse files
YiPeng ChaiHawking Zhang
authored andcommitted
amd/amdgpu: Reduce unnecessary repetitive GPU resets
In multiple GPUs case, after a GPU has started resetting all GPUs on hive, other GPUs do not need to trigger GPU reset again. Signed-off-by: YiPeng Chai <[email protected]> Reviewed-by: Hawking Zhang <[email protected]>
1 parent 2f767b9 commit 8809b4b

File tree

1 file changed

+20
-1
lines changed

1 file changed

+20
-1
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4030,8 +4030,27 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
40304030
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
40314031
}
40324032

4033-
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
4033+
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) {
4034+
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
4035+
int hive_ras_recovery = 0;
4036+
4037+
if (hive) {
4038+
hive_ras_recovery = atomic_read(&hive->ras_recovery);
4039+
amdgpu_put_xgmi_hive(hive);
4040+
}
4041+
/* In the case of multiple GPUs, after a GPU has started
4042+
* resetting all GPUs on hive, other GPUs do not need to
4043+
* trigger GPU reset again.
4044+
*/
4045+
if (!hive_ras_recovery)
4046+
amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
4047+
else
4048+
atomic_set(&ras->in_recovery, 0);
4049+
} else {
4050+
flush_work(&ras->recovery_work);
40344051
amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
4052+
}
4053+
40354054
return 0;
40364055
}
40374056

0 commit comments

Comments
 (0)