From 2ec4bb1dd2a868c7b39823b0698c1dfe1459e7d1 Mon Sep 17 00:00:00 2001
From: Andy Leiserson <aleiserson@mozilla.com>
Date: Thu, 8 Jan 2026 11:33:06 -0800
Subject: [PATCH 1/2] Round max_vertex_buffer_array_stride down to a multiple
 of 4

Fixes #8846

Note that the CTS test in the issue is already enabled
(`webgpu:api,operation,vertex_state,correctness:non_zero_array_stride_and_attribute_offset:*`).
It seems the failure only appears on specific hardware/drivers.
---
 wgpu-hal/src/auxil/mod.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/wgpu-hal/src/auxil/mod.rs b/wgpu-hal/src/auxil/mod.rs
index 8692b273c11..a80ac039e23 100644
--- a/wgpu-hal/src/auxil/mod.rs
+++ b/wgpu-hal/src/auxil/mod.rs
@@ -136,6 +136,7 @@ pub(crate) fn apply_hal_limits(mut limits: wgt::Limits) -> wgt::Limits {
     // it may duplicate some of these limits.
     limits.max_bind_groups = limits.max_bind_groups.min(crate::MAX_BIND_GROUPS as u32);
     limits.max_storage_buffer_binding_size &= !(wgt::STORAGE_BINDING_SIZE_ALIGNMENT - 1);
+    limits.max_vertex_buffer_array_stride &= !(wgt::VERTEX_ALIGNMENT as u32 - 1);
     limits.max_vertex_buffers = limits
         .max_vertex_buffers
         .min(crate::MAX_VERTEX_BUFFERS as u32);

From 81255ad7230c33dd5a40ed06f8d54f98005a5c08 Mon Sep 17 00:00:00 2001
From: Andy Leiserson <aleiserson@mozilla.com>
Date: Wed, 14 Jan 2026 11:08:00 -0800
Subject: [PATCH 2/2] Add and revise comments

---
 wgpu-hal/src/auxil/mod.rs     | 22 ++++++++++++++++------
 wgpu-hal/src/metal/adapter.rs | 11 ++++++++---
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/wgpu-hal/src/auxil/mod.rs b/wgpu-hal/src/auxil/mod.rs
index a80ac039e23..39054da1879 100644
--- a/wgpu-hal/src/auxil/mod.rs
+++ b/wgpu-hal/src/auxil/mod.rs
@@ -127,21 +127,31 @@ impl crate::TextureCopy {
     }
 }
 
-/// Clamp the limits in `limits` to honor any HAL-imposed maximums.
+/// Clamp the limits in `limits` to honor HAL-imposed maximums and WebGPU
+/// alignment requirements.
 ///
-/// Limits that do not have a HAL-defined maximum are left unchanged.
+/// Other limits are left unchanged.
 #[cfg_attr(not(any_backend), allow(dead_code))]
 pub(crate) fn apply_hal_limits(mut limits: wgt::Limits) -> wgt::Limits {
-    // The Metal backend wants to have its own consistent view of the limits, so
-    // it may duplicate some of these limits.
+    // The Metal backend maintains two copies of many limit values (one as
+    // `wgt::Limits` and one as `metal::PrivateCapabilities`). In order to avoid
+    // confusing discrepancies between the two, some of the logic here is
+    // duplicated in the initialization of `metal::PrivateCapabilities`.
+    // See <https://github.com/gfx-rs/wgpu/issues/8715>.
+
     limits.max_bind_groups = limits.max_bind_groups.min(crate::MAX_BIND_GROUPS as u32);
-    limits.max_storage_buffer_binding_size &= !(wgt::STORAGE_BINDING_SIZE_ALIGNMENT - 1);
-    limits.max_vertex_buffer_array_stride &= !(wgt::VERTEX_ALIGNMENT as u32 - 1);
     limits.max_vertex_buffers = limits
         .max_vertex_buffers
         .min(crate::MAX_VERTEX_BUFFERS as u32);
     limits.max_color_attachments = limits
         .max_color_attachments
         .min(crate::MAX_COLOR_ATTACHMENTS as u32);
+
+    // Round some limits down to the WebGPU alignment requirement, to avoid
+    // suggesting values that won't work. (In particular, the CTS queries limits
+    // and then tests the exact limit value.)
+    limits.max_storage_buffer_binding_size &= !(wgt::STORAGE_BINDING_SIZE_ALIGNMENT - 1);
+    limits.max_vertex_buffer_array_stride &= !(wgt::VERTEX_ALIGNMENT as u32 - 1);
+
     limits
 }
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
index fed8bab1685..03f5b382889 100644
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@@ -619,6 +619,11 @@ impl super::PrivateCapabilities {
             MTLLanguageVersion::V1_0
         };
 
+        // The `PrivateCapabilities` we are constructing here duplicates many of the limits
+        // in `wgt::Limits`, creating a risk of confusion if the limits are adjusted between
+        // the initialization here and the final `wgt::Limits` values. To reduce this risk,
+        // some of the calculations here duplicate logic in `auxil::apply_hal_limits`.
+        // See <https://github.com/gfx-rs/wgpu/issues/8715>.
         Self {
             msl_version,
             // macOS 10.11 doesn't support read-write resources
@@ -723,7 +728,7 @@ impl super::PrivateCapabilities {
             format_bgr10a2_all: Self::supports_any(device, BGR10A2_ALL),
             format_bgr10a2_no_write: !Self::supports_any(device, BGR10A2_ALL),
             max_buffers_per_stage: 31,
-            max_vertex_buffers: 31.min(crate::MAX_VERTEX_BUFFERS as u32),
+            max_vertex_buffers: 31.min(crate::MAX_VERTEX_BUFFERS as u32), // duplicative of `apply_hal_limits`
             max_textures_per_stage: if os_type == super::OsType::Macos
                 || (family_check && device.supports_family(MTLGPUFamily::Apple6))
             {
@@ -1162,7 +1167,7 @@ impl super::PrivateCapabilities {
         let base = wgt::Limits::default();
         // Be careful adjusting limits here. The `AdapterShared` stores the
         // original `PrivateCapabilities`, so code could accidentally use
-        // the wrong value.
+        // the wrong value. See <https://github.com/gfx-rs/wgpu/issues/8715>.
 
         let limits = wgt::Limits {
             max_texture_dimension_1d: self.max_texture_size as u32,
@@ -1241,7 +1246,7 @@ impl super::PrivateCapabilities {
         // Since a bunch of the limits are duplicated between `Limits` and
         // `PrivateCapabilities`, reducing the limits at this point could make
         // things inconsistent and lead to confusion. Make sure that doesn't
-        // happen.
+        // happen. See <https://github.com/gfx-rs/wgpu/issues/8715>.
         debug_assert!(
             crate::auxil::apply_hal_limits(limits.clone()) == limits,
             "Limits were modified by apply_hal_limits\nOriginal:\n{:#?}\nModified:\n{:#?}",