Bug 1700434 - Add SWGL fast-path for ps_text_run. r=jrmuizel

lsalzman · lsalzman · commit fcc049c5fdf4 · 2021-05-21T21:44:55.000Z
This adds some swgl_commitTextureLinearR8ToRGBA8 variations so that we can deal with alpha glyph formats. Following that, a simple span shader is added that dispatches to this as appropriate. Differential Revision: https://phabricator.services.mozilla.com/D115551 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/e53af9e245db6f90ebf083c66456a12ee99832cb
diff --git a/glsl-to-cxx/src/hir.rs b/glsl-to-cxx/src/hir.rs
@@ -3976,6 +3976,13 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
             Type::new(Void),
             vec![Type::new(*s), Type::new(Vec2), Type::new(Vec4)],
         );
+        declare_function(
+            state,
+            "swgl_commitTextureLinearR8ToRGBA8",
+            None,
+            Type::new(Void),
+            vec![Type::new(*s), Type::new(Vec2), Type::new(Vec4)],
+        );
         declare_function(
             state,
             "swgl_commitPartialTextureLinearR8",
@@ -4011,6 +4018,13 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
             Type::new(Void),
             vec![Type::new(*s), Type::new(Vec2), Type::new(Vec4), Type::new(Float)],
         );
+        declare_function(
+            state,
+            "swgl_commitTextureLinearColorR8ToRGBA8",
+            None,
+            Type::new(Void),
+            vec![Type::new(*s), Type::new(Vec2), Type::new(Vec4), Type::new(Vec4)],
+        );
 
         declare_function(
             state,
diff --git a/swgl/README.md b/swgl/README.md
@@ -153,9 +153,11 @@ those span boundary pixels to estimate the coverage based on edge slope.
 ```
 void swgl_commitTextureLinearRGBA8(sampler, vec2 uv, vec4 uv_bounds);
 void swgl_commitTextureLinearR8(sampler, vec2 uv, vec4 uv_bounds);
+void swgl_commitTextureLinearR8ToRGBA8(sampler, vec2 uv, vec4 uv_bounds);
 
 void swgl_commitTextureLinearColorRGBA8(sampler, vec2 uv, vec4 uv_bounds, vec4|float color);
 void swgl_commitTextureLinearColorR8(sampler, vec2 uv, vec4 uv_bounds, vec4|float color);
+void swgl_commitTextureLinearColorR8ToRGBA8(sampler, vec2 uv, vec4 uv_bounds, vec4|float color);
 
 void swgl_commitTextureLinearRepeatRGBA8(sampler, vec2 uv, vec2 tile_repeat, vec4 uv_repeat, vec4 uv_bounds);
 void swgl_commitTextureLinearRepeatColorRGBA8(sampler, vec2 uv, vec2 tile_repeat, vec4 uv_repeat, vec4 uv_bounds, vec4|float color);
@@ -181,7 +183,9 @@ within the supplied uv bounds. The color variations also accept a supplied color
 that modulates the result.
 
 The RGBA8 versions may only be used to commit within `swgl_drawSpanRGBA8`, and
-the R8 versions may only be used to commit within `swgl_drawSpanR8`.
+the R8 versions may only be used to commit within `swgl_drawSpanR8`. The R8ToRGBA8
+versions may be used to sample from an R8 source while committing to an RGBA8
+framebuffer.
 
 The Linear variations use a linear filter that bilinearly interpolates between
 the four samples near the pixel. The Nearest variations use a nearest filter
diff --git a/swgl/src/blend.h b/swgl/src/blend.h
@@ -155,21 +155,16 @@ static ALWAYS_INLINE P applyColor(P src, InvertColor) {
 
 template <typename P>
 static ALWAYS_INLINE P applyColor(P src, P color) {
-  return muldiv256(src, color);
+  return muldiv255(color, src);
 }
 
 static ALWAYS_INLINE WideRGBA8 applyColor(PackedRGBA8 src, WideRGBA8 color) {
-  return muldiv256(unpack(src), color);
+  return applyColor(unpack(src), color);
 }
 
-// Packs a color on a scale of 0..256 rather than 0..255 to allow faster scale
-// math with muldiv256. Note that this can cause a slight rounding difference in
-// the result versus the 255 scale. To alleviate this we scale by 256.49, so
-// that the color rounds slightly up and in turn causes the the value it scales
-// to round slightly up as well.
 template <typename P, typename C>
 static ALWAYS_INLINE auto packColor(P* buf, C color) {
-  return pack_span(buf, color, 256.49f);
+  return pack_span(buf, color, 255.0f);
 }
 
 template <typename P>
@@ -347,11 +342,10 @@ static void* swgl_SpanBuf = nullptr;
 // A pointer into the clip mask for the start of the span.
 static uint8_t* swgl_ClipMaskBuf = nullptr;
 
-static ALWAYS_INLINE WideR8 expand_clip_mask(UNUSED uint8_t* buf, WideR8 mask) {
+static ALWAYS_INLINE WideR8 expand_mask(UNUSED uint8_t* buf, WideR8 mask) {
   return mask;
 }
-static ALWAYS_INLINE WideRGBA8 expand_clip_mask(UNUSED uint32_t* buf,
-                                                WideR8 mask) {
+static ALWAYS_INLINE WideRGBA8 expand_mask(UNUSED uint32_t* buf, WideR8 mask) {
   WideRG8 maskRG = zip(mask, mask);
   return zip(maskRG, maskRG);
 }
@@ -367,9 +361,9 @@ static ALWAYS_INLINE uint8_t* get_clip_mask(P* buf) {
 
 template <typename P>
 static ALWAYS_INLINE auto load_clip_mask(P* buf, int span)
-    -> decltype(expand_clip_mask(buf, 0)) {
-  return expand_clip_mask(
-      buf, unpack(load_span<PackedR8>(get_clip_mask(buf), span)));
+    -> decltype(expand_mask(buf, 0)) {
+  return expand_mask(buf,
+                     unpack(load_span<PackedR8>(get_clip_mask(buf), span)));
 }
 
 // Temporarily removes masking from the blend stage, assuming the caller will
diff --git a/swgl/src/swgl_ext.h b/swgl/src/swgl_ext.h
@@ -14,10 +14,10 @@ static void commit_masked_solid_span(P* buf, C color, int len) {
   for (P* end = &buf[len]; buf < end; buf += 4, mask += 4) {
     commit_span(
         buf,
-        blend_span(buf,
-                   applyColor(expand_clip_mask(
-                                  buf, unpack(unaligned_load<PackedR8>(mask))),
-                              color)));
+        blend_span(
+            buf,
+            applyColor(expand_mask(buf, unpack(unaligned_load<PackedR8>(mask))),
+                       color)));
   }
   restore_clip_mask();
 }
@@ -614,6 +614,44 @@ static inline LinearFilter needsTextureLinear(S sampler, T P, int span) {
 #define swgl_commitTextureLinearColorR8(s, p, uv_rect, color) \
   swgl_commitTextureLinear(R8, s, p, uv_rect, color, swgl_SpanLength)
 
+// Helper function that samples from an R8 texture while expanding it to support
+// a differing framebuffer format.
+template <bool BLEND, typename S, typename C, typename P>
+static inline int blendTextureLinearR8(S sampler, vec2 uv, int span,
+                                       const vec4_scalar& uv_rect, C color,
+                                       P* buf) {
+  if (!swgl_isTextureR8(sampler)) {
+    return 0;
+  }
+  LINEAR_QUANTIZE_UV(sampler, uv, uv_step, uv_rect, min_uv, max_uv);
+  for (P* end = buf + span; buf < end; buf += swgl_StepSize, uv += uv_step) {
+    commit_blend_span<BLEND>(
+        buf, applyColor(expand_mask(buf, textureLinearUnpackedR8(
+                                             sampler,
+                                             ivec2(clamp(uv, min_uv, max_uv)))),
+                        color));
+  }
+  return span;
+}
+
+// Commit an entire span with linear filtering while expanding from R8 to RGBA8
+#define swgl_commitTextureLinearColorR8ToRGBA8(s, p, uv_rect, color)      \
+  do {                                                                    \
+    auto packed_color = packColor(swgl_OutRGBA8, color);                  \
+    int drawn = 0;                                                        \
+    if (blend_key) {                                                      \
+      drawn = blendTextureLinearR8<true>(s, p, swgl_SpanLength, uv_rect,  \
+                                         packed_color, swgl_OutRGBA8);    \
+    } else {                                                              \
+      drawn = blendTextureLinearR8<false>(s, p, swgl_SpanLength, uv_rect, \
+                                          packed_color, swgl_OutRGBA8);   \
+    }                                                                     \
+    swgl_OutRGBA8 += drawn;                                               \
+    swgl_SpanLength -= drawn;                                             \
+  } while (0)
+#define swgl_commitTextureLinearR8ToRGBA8(s, p, uv_rect) \
+  swgl_commitTextureLinearColorR8ToRGBA8(s, p, uv_rect, NoColor())
+
 // Compute repeating UVs, possibly constrained by tile repeat limits
 static inline vec2 tileRepeatUV(vec2 uv, const vec2_scalar& tile_repeat) {
   if (tile_repeat.x > 0.0f) {
diff --git a/webrender/res/ps_text_run.glsl b/webrender/res/ps_text_run.glsl
@@ -326,4 +326,24 @@ void main() {
     #endif
 }
 
+#if defined(SWGL_DRAW_SPAN) && defined(SWGL_BLEND) && defined(SWGL_CLIP_DIST)
+void swgl_drawSpanRGBA8() {
+    // Only support simple swizzles for now. More complex swizzles must either
+    // be handled by blend overrides or the slow path.
+    if (v_mask_swizzle.x != 0.0 && v_mask_swizzle.x != 1.0) {
+        return;
+    }
+
+    #ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+        swgl_commitTextureLinearRGBA8(sColor0, v_uv, v_uv_bounds);
+    #else
+        if (swgl_isTextureR8(sColor0)) {
+            swgl_commitTextureLinearColorR8ToRGBA8(sColor0, v_uv, v_uv_bounds, v_color);
+        } else {
+            swgl_commitTextureLinearColorRGBA8(sColor0, v_uv, v_uv_bounds, v_color);
+        }
+    #endif
+}
+#endif
+
 #endif // WR_FRAGMENT_SHADER
diff --git a/wrench/reftests/text/reftest.list b/wrench/reftests/text/reftest.list
@@ -16,11 +16,11 @@ fuzzy(1,1) == shadow-huge.yaml shadow-huge-ref.yaml
 == decorations.yaml decorations-ref.yaml
 skip_on(android,device) fuzzy(1,3635) fuzzy-if(platform(swgl),3,13395) == decorations-suite.yaml decorations-suite.png  # Fails on Pixel2
 == 1658.yaml 1658-ref.yaml
-fuzzy(2,405) == split-batch.yaml split-batch-ref.yaml
+fuzzy(2,405) fuzzy-if(platform(swgl),2,1508) == split-batch.yaml split-batch-ref.yaml
 # Next 3 tests affected by bug 1548099 on Android
 skip_on(android) == shadow-red.yaml shadow-red-ref.yaml
-skip_on(android) fuzzy(1,999) fuzzy-if(platform(swgl),2,1081) == shadow-grey.yaml shadow-grey-ref.yaml
-skip_on(android) fuzzy(1,828) fuzzy-if(platform(swgl),1,1249) == shadow-grey-transparent.yaml shadow-grey-ref.yaml
+skip_on(android) fuzzy(1,999) fuzzy-if(platform(swgl),2,1324) == shadow-grey.yaml shadow-grey-ref.yaml
+skip_on(android) fuzzy(1,828) fuzzy-if(platform(swgl),2,1538) == shadow-grey-transparent.yaml shadow-grey-ref.yaml
 == subtle-shadow.yaml subtle-shadow-ref.yaml
 fuzzy(1,64) == shadow-atomic.yaml shadow-atomic-ref.yaml
 fuzzy(1,64) == shadow-clip-rect.yaml shadow-atomic-ref.yaml