From 5e4faa59716e408c65d064a65cc73b9f76e0b6df Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Fri, 5 Apr 2024 17:01:49 -0400 Subject: [PATCH 1/5] First pass DoF optimisation. --- .../Runtime/Effects/DepthOfField.cs | 40 ++++- .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 4 + .../Shaders/Builtins/DepthOfField.hlsl | 100 ++++++++++- .../Shaders/Builtins/DepthOfField.shader | 64 ++++++- .../Shaders/Builtins/DiskKernels.hlsl | 159 ++++++++++++++++++ 5 files changed, 357 insertions(+), 10 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 396107ce3cd..7fe39b6ce96 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -89,11 +89,15 @@ enum Pass { CoCCalculation, CoCTemporalFilter, + downsampleInitialMaxCoC, + downsampleMaxCoC, + extendMaxCoC, DownsampleAndPrefilter, BokehSmallKernel, BokehMediumKernel, BokehLargeKernel, BokehVeryLargeKernel, + BokehUnified, PostFilter, Combine, DebugOverlay @@ -146,6 +150,12 @@ float CalculateMaxCoCRadius(int screenHeight) return Mathf.Min(0.05f, radiusInPixels / screenHeight); } + void CalculateCoCKernelLimits(int screenHeight, out Vector4 cocKernelLimitsA, out Vector4 cocKernelLimitsB) + { + cocKernelLimitsA = new Vector4(2-0.5f, 6- 0.5f, 10- 0.5f, 14- 0.5f) / screenHeight; + cocKernelLimitsB = new Vector4(18, 22, 26, 30) / screenHeight; + } + RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, RenderTextureFormat format) { var rt = m_CoCHistoryTextures[eye][id]; @@ -166,6 +176,8 @@ RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, Re public override void Render(PostProcessRenderContext context) { + bool useUnified = true; // (kc) + // The coc is stored in alpha so we need a 4 channels target. Note that using ARGB32 // will result in a very weak near-blur. var colorFormat = context.camera.allowHDR ? RenderTextureFormat.ARGBHalf : RenderTextureFormat.ARGB32; @@ -179,10 +191,18 @@ public override void Render(PostProcessRenderContext context) var coeff = f * f / (settings.aperture.value * (s1 - f) * scaledFilmHeight * 2f); var maxCoC = CalculateMaxCoCRadius(context.screenHeight); + Vector4 cocKernelLimitsA; + Vector4 cocKernelLimitsB; + CalculateCoCKernelLimits(context.screenHeight, out cocKernelLimitsA, out cocKernelLimitsB); + cocKernelLimitsA /= maxCoC; + cocKernelLimitsB /= maxCoC; + var sheet = context.propertySheets.Get(context.resources.shaders.depthOfField); sheet.properties.Clear(); sheet.properties.SetFloat(ShaderIDs.Distance, s1); sheet.properties.SetFloat(ShaderIDs.LensCoeff, coeff); + sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsA, cocKernelLimitsA); + sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsB, cocKernelLimitsB); sheet.properties.SetFloat(ShaderIDs.MaxCoC, maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpMaxCoC, 1f / maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpAspect, 1f / aspect); @@ -213,13 +233,31 @@ public override void Render(PostProcessRenderContext context) cmd.SetGlobalTexture(ShaderIDs.CoCTex, historyWrite); } + int macCoCIndex = 4; + + // Downsampling CoC + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> 1, context.height >> 1); + cmd.BlitFullscreenTriangle(ShaderIDs.CoCTex, ShaderIDs.MaxCoCMips[1], sheet, (int)Pass.downsampleInitialMaxCoC); + for (int i = 2; i <= macCoCIndex; ++i) + { + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> i, context.height >> i); + cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[i-1], ShaderIDs.MaxCoCMips[i], sheet, (int)Pass.downsampleMaxCoC); + } + + if (useUnified) + { + // Extend CoC + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> macCoCIndex, context.height >> macCoCIndex); + cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[macCoCIndex], ShaderIDs.MaxCoCTex, sheet, (int)Pass.extendMaxCoC); + } + // Downsampling and prefiltering pass context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.DepthOfFieldTex, 0, colorFormat, RenderTextureReadWrite.Default, FilterMode.Bilinear, context.width / 2, context.height / 2); cmd.BlitFullscreenTriangle(context.source, ShaderIDs.DepthOfFieldTex, sheet, (int)Pass.DownsampleAndPrefilter); // Bokeh simulation pass context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.DepthOfFieldTemp, 0, colorFormat, RenderTextureReadWrite.Default, FilterMode.Bilinear, context.width / 2, context.height / 2); - cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, useUnified ? (int)Pass.BokehUnified : (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); // Postfilter pass cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTemp, ShaderIDs.DepthOfFieldTex, sheet, (int)Pass.PostFilter); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index f555f7b8560..22551387300 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -67,8 +67,12 @@ static class ShaderIDs internal static readonly int DepthOfFieldTemp = Shader.PropertyToID("_DepthOfFieldTemp"); internal static readonly int DepthOfFieldTex = Shader.PropertyToID("_DepthOfFieldTex"); + internal static readonly int[] MaxCoCMips = new int[] { Shader.PropertyToID("_CoCMip0"), Shader.PropertyToID("_CoCMip1"), Shader.PropertyToID("_CoCMip2"), Shader.PropertyToID("_CoCMip3"), Shader.PropertyToID("_CoCMip4"), Shader.PropertyToID("_CoCMip5") }; + internal static readonly int MaxCoCTex = Shader.PropertyToID("_MaxCoCTex"); internal static readonly int Distance = Shader.PropertyToID("_Distance"); internal static readonly int LensCoeff = Shader.PropertyToID("_LensCoeff"); + internal static readonly int CoCKernelLimitsA = Shader.PropertyToID("_CoCKernelLimitsA"); + internal static readonly int CoCKernelLimitsB = Shader.PropertyToID("_CoCKernelLimitsB"); internal static readonly int MaxCoC = Shader.PropertyToID("_MaxCoC"); internal static readonly int RcpMaxCoC = Shader.PropertyToID("_RcpMaxCoC"); internal static readonly int RcpAspect = Shader.PropertyToID("_RcpAspect"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 693304ae0f0..c23669d0472 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -12,6 +12,7 @@ TEXTURE2D_SAMPLER2D(_CameraDepthTexture, sampler_CameraDepthTexture); TEXTURE2D_SAMPLER2D(_CameraMotionVectorsTexture, sampler_CameraMotionVectorsTexture); TEXTURE2D_SAMPLER2D(_CoCTex, sampler_CoCTex); +TEXTURE2D_SAMPLER2D(_MaxCoCTex, sampler_MaxCoCTex); TEXTURE2D_SAMPLER2D(_DepthOfFieldTex, sampler_DepthOfFieldTex); float4 _DepthOfFieldTex_TexelSize; @@ -19,6 +20,8 @@ float4 _DepthOfFieldTex_TexelSize; // Camera parameters float _Distance; float _LensCoeff; // f^2 / (N * (S1 - f) * film_width * 2) +half4 _CoCKernelLimitsA; +half4 _CoCKernelLimitsB; float _MaxCoC; float _RcpMaxCoC; float _RcpAspect; @@ -147,18 +150,95 @@ half4 FragPrefilter(VaryingsDefault i) : SV_Target return half4(avg, coc); } +half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target +{ + // TODO gather version + + float3 duv = _MainTex_TexelSize.xyx * float3(0.5, 0.5, -0.5); + float2 uv0 = UnityStereoTransformScreenSpaceTex(i.texcoord - duv.xy); + float2 uv1 = UnityStereoTransformScreenSpaceTex(i.texcoord - duv.zy); + float2 uv2 = UnityStereoTransformScreenSpaceTex(i.texcoord + duv.zy); + float2 uv3 = UnityStereoTransformScreenSpaceTex(i.texcoord + duv.xy); + + // Sample CoCs + half4 cocs; + cocs.x = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv0).r; + cocs.y = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv1).r; + cocs.z = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv2).r; + cocs.w = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv3).r; + +#if defined(INITIAL_COC) + cocs = cocs * 2.0 - 1.0; +#endif + cocs = abs(cocs); + + half maxCoC = max(cocs.x, Max3(cocs.y, cocs.z, cocs.w)); + return half4(maxCoC, 0.0, 0.0, 0.0); +} + +half4 FragExtendCoC(VaryingsDefault i) : SV_Target +{ + float tx = _MainTex_TexelSize.x; + float ty = _MainTex_TexelSize.y; + + float2 uv0 = UnityStereoTransformScreenSpaceTex(i.texcoord); + float2 uv1 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( tx, 0)); + float2 uv2 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( tx, ty)); + float2 uv3 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( 0, ty)); + float2 uv4 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2(-tx, ty)); + float2 uv5 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2(-tx, 0)); + float2 uv6 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2(-tx,-ty)); + float2 uv7 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( 0,-ty)); + float2 uv8 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( tx,-ty)); + + half coc0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv0).r; + half coc1 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv1).r; + half coc2 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv2).r; + half coc3 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv3).r; + half coc4 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv4).r; + half coc5 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv5).r; + half coc6 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv6).r; + half coc7 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv7).r; + half coc8 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv8).r; + + half maxCoC = Max3(Max3(coc0, coc1, coc2), Max3(coc3, coc4, coc5), Max3(coc6, coc7, coc8)); + return half4(maxCoC, 0.0, 0.0, 0.0); +} + + // Bokeh filter with disk-shaped kernels half4 FragBlur(VaryingsDefault i) : SV_Target { half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); + // normalized value in range [0, 1] + half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; + + int sampleCount; + +#if defined(KERNEL_UNIFIED) + UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[0]) + sampleCount = kDiskKernelSizes[0]; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[1]) + sampleCount = kDiskKernelSizes[1]; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[2]) + sampleCount = kDiskKernelSizes[2]; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[3]) + sampleCount = kDiskKernelSizes[3]; + else + sampleCount = kDiskKernelSizes[4]; + //(kc) sampleCount = kDiskKernelSizes[4]; +#else + sampleCount = kSampleCount; +#endif half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh UNITY_LOOP - for (int si = 0; si < kSampleCount; si++) + for (int si = 0; si < sampleCount; si++) { - float2 disp = kDiskKernel[si] * _MaxCoC; + //float2 disp = kDiskKernel[si] * _MaxCoC; + float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0/8.0); float dist = length(disp); float2 duv = float2(disp.x * _RcpAspect, disp.y); @@ -198,6 +278,22 @@ half4 FragBlur(VaryingsDefault i) : SV_Target half alpha = saturate(fgAcc.a); half3 rgb = lerp(bgAcc.rgb, fgAcc.rgb, alpha); +#if defined(KERNEL_UNIFIED) + if (i.texcoord.x < 0.1) + rgb.r += 0.5; // (kc) + + /* + if (sampleCount == 8) + rgb.r += 0.5; + if (sampleCount == 22) + rgb.g += 0.5; + if (sampleCount == 43) + rgb.b += 0.5; + if (sampleCount == 1) + rgb.rg += 0.5; + */ +#endif + return half4(rgb, alpha); } diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index 69c363f6540..9d852ee5be1 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -32,6 +32,43 @@ Shader "Hidden/PostProcessing/DepthOfField" } Pass // 2 + { + Name "Downsample initial MaxCoC" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertDefault + #pragma fragment FragDownsampleCoC + #define INITIAL_COC + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 3 + { + Name "Downsample MaxCoC" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertDefault + #pragma fragment FragDownsampleCoC + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 4 + { + Name "Extend MaxCoC" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertDefault + #pragma fragment FragExtendCoC + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 5 { Name "Downsample and Prefilter" @@ -43,7 +80,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 3 + Pass // 6 { Name "Bokeh Filter (small)" @@ -56,7 +93,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 4 + Pass // 7 { Name "Bokeh Filter (medium)" @@ -69,7 +106,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 5 + Pass // 8 { Name "Bokeh Filter (large)" @@ -82,7 +119,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 6 + Pass // 9 { Name "Bokeh Filter (very large)" @@ -95,7 +132,20 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 7 + Pass // 10 + { + Name "Bokeh Filter (unified)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertDefault + #pragma fragment FragBlur + #define KERNEL_UNIFIED + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 11 { Name "Postfilter" @@ -107,7 +157,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 8 + Pass // 12 { Name "Combine" @@ -119,7 +169,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 9 + Pass // 13 { Name "Debug Overlay" diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl index b817ce852ed..31f68f90387 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl @@ -201,4 +201,163 @@ static const float2 kDiskKernel[kSampleCount] = { #endif + +static const int kDiskKernelSizes[7] = { 1, 8, 22, 43, 71, 106, 148 }; +static const float2 kDiskAllKernels[148] = { +float2(0, 0), +// ring 1 index=1 +float2(0.186046511627907, 0), +float2(0.115998102671392, 0.145457019994052), +float2(-0.0413992435267562, 0.181381937150107), +float2(-0.16762211495859, 0.0807225561148946), +float2(-0.16762211495859, -0.0807225561148945), +float2(-0.0413992435267562, -0.181381937150107), +float2(0.115998102671392, -0.145457019994052), +// ring 2 index=8 +float2(0.348837209302326, 0), +float2(0.314291465547356, 0.151354792715427), +float2(0.217496442508861, 0.272731912488848), +float2(0.0776235816126678, 0.34009113215645), +float2(-0.0776235816126678, 0.34009113215645), +float2(-0.217496442508861, 0.272731912488848), +float2(-0.314291465547355, 0.151354792715427), +float2(-0.348837209302326, 4.27202371795588E-17), +float2(-0.314291465547356, -0.151354792715427), +float2(-0.217496442508861, -0.272731912488848), +float2(-0.0776235816126679, -0.34009113215645), +float2(0.0776235816126674, -0.34009113215645), +float2(0.21749644250886, -0.272731912488848), +float2(0.314291465547356, -0.151354792715427), +// ring 3 index=22 +float2(0.511627906976744, 0), +float2(0.488897714588258, 0.150804972954416), +float2(0.422726814766323, 0.288210262265109), +float2(0.318994782346329, 0.400006804983643), +float2(0.186918663629318, 0.47626098767843), +float2(0.0382340013697985, 0.510197291581069), +float2(-0.113847919698579, 0.498800327162793), +float2(-0.255813953488372, 0.443082764726922), +float2(-0.375049794889679, 0.347995354208377), +float2(-0.460960816136121, 0.22198702931596), +float2(-0.505913445975647, 0.0762541826947871), +float2(-0.505913445975647, -0.0762541826947867), +float2(-0.460960816136121, -0.22198702931596), +float2(-0.375049794889679, -0.347995354208377), +float2(-0.255813953488372, -0.443082764726922), +float2(-0.11384791969858, -0.498800327162793), +float2(0.0382340013697985, -0.510197291581069), +float2(0.186918663629319, -0.47626098767843), +float2(0.318994782346329, -0.400006804983643), +float2(0.422726814766323, -0.288210262265109), +float2(0.488897714588258, -0.150804972954416), +// ring 4 index=43 +float2(0.674418604651163, 0), +float2(0.657509522169137, 0.150072257784491), +float2(0.607630166724887, 0.292619265916493), +float2(0.527281697478439, 0.420493122183797), +float2(0.420493122183797, 0.527281697478439), +float2(0.292619265916493, 0.607630166724887), +float2(0.150072257784491, 0.657509522169137), +float2(4.12962292735735E-17, 0.674418604651163), +float2(-0.150072257784491, 0.657509522169137), +float2(-0.292619265916493, 0.607630166724887), +float2(-0.420493122183797, 0.527281697478439), +float2(-0.527281697478438, 0.420493122183797), +float2(-0.607630166724887, 0.292619265916493), +float2(-0.657509522169137, 0.150072257784491), +float2(-0.674418604651163, 8.25924585471471E-17), +float2(-0.657509522169137, -0.150072257784491), +float2(-0.607630166724887, -0.292619265916493), +float2(-0.527281697478439, -0.420493122183797), +float2(-0.420493122183797, -0.527281697478439), +float2(-0.292619265916493, -0.607630166724887), +float2(-0.150072257784491, -0.657509522169137), +float2(-1.23888687820721E-16, -0.674418604651163), +float2(0.15007225778449, -0.657509522169137), +float2(0.292619265916493, -0.607630166724887), +float2(0.420493122183797, -0.527281697478439), +float2(0.527281697478439, -0.420493122183797), +float2(0.607630166724887, -0.292619265916492), +float2(0.657509522169137, -0.150072257784491), +// ring 5 index=71 +float2(0.837209302325581, 0), +float2(0.823755004408155, 0.149489493319789), +float2(0.783824542861175, 0.294174271323915), +float2(0.718701315573655, 0.429404046200294), +float2(0.630478436654186, 0.550832421716969), +float2(0.521991462021265, 0.654556589973234), +float2(0.396727252302976, 0.737242770856804), +float2(0.258711902267398, 0.796233362479663), +float2(0.112381338824084, 0.829632358689434), +float2(-0.0375612533167101, 0.836366288267147), +float2(-0.186296595870403, 0.81621871717548), +float2(-0.329044212547471, 0.769837204926796), +float2(-0.461216077494783, 0.698712491487602), +float2(-0.578564078221561, 0.605130583669444), +float2(-0.677316553430188, 0.492099280989047), +float2(-0.754299517313653, 0.363251502517026), +float2(-0.807038674070947, 0.222728521869775), +float2(-0.833838943809968, 0.0750468632679909), +float2(-0.833838943809968, -0.0750468632679907), +float2(-0.807038674070947, -0.222728521869774), +float2(-0.754299517313653, -0.363251502517025), +float2(-0.677316553430189, -0.492099280989047), +float2(-0.578564078221562, -0.605130583669444), +float2(-0.461216077494784, -0.698712491487602), +float2(-0.329044212547471, -0.769837204926796), +float2(-0.186296595870403, -0.81621871717548), +float2(-0.0375612533167103, -0.836366288267147), +float2(0.112381338824084, -0.829632358689434), +float2(0.258711902267398, -0.796233362479664), +float2(0.396727252302976, -0.737242770856804), +float2(0.521991462021265, -0.654556589973234), +float2(0.630478436654186, -0.550832421716969), +float2(0.718701315573655, -0.429404046200294), +float2(0.783824542861175, -0.294174271323915), +float2(0.823755004408155, -0.149489493319789), +// ring 6 index=106 +float2(1, 0), +float2(0.988830826225129, 0.149042266176174), +float2(0.955572805786141, 0.294755174410904), +float2(0.900968867902419, 0.433883739117558), +float2(0.826238774315995, 0.563320058063622), +float2(0.733051871829826, 0.680172737770919), +float2(0.623489801858734, 0.78183148246803), +float2(0.5, 0.866025403784439), +float2(0.365341024366395, 0.930873748644204), +float2(0.222520933956314, 0.974927912181824), +float2(0.0747300935864244, 0.99720379718118), +float2(-0.074730093586424, 0.99720379718118), +float2(-0.222520933956314, 0.974927912181824), +float2(-0.365341024366395, 0.930873748644204), +float2(-0.5, 0.866025403784439), +float2(-0.623489801858733, 0.78183148246803), +float2(-0.733051871829826, 0.680172737770919), +float2(-0.826238774315995, 0.563320058063622), +float2(-0.900968867902419, 0.433883739117558), +float2(-0.955572805786141, 0.294755174410905), +float2(-0.988830826225129, 0.149042266176175), +float2(-1, 1.22464679914735E-16), +float2(-0.988830826225129, -0.149042266176174), +float2(-0.955572805786141, -0.294755174410904), +float2(-0.900968867902419, -0.433883739117558), +float2(-0.826238774315995, -0.563320058063622), +float2(-0.733051871829826, -0.680172737770919), +float2(-0.623489801858734, -0.78183148246803), +float2(-0.5, -0.866025403784438), +float2(-0.365341024366395, -0.930873748644204), +float2(-0.222520933956315, -0.974927912181824), +float2(-0.0747300935864247, -0.99720379718118), +float2(0.0747300935864244, -0.99720379718118), +float2(0.222520933956314, -0.974927912181824), +float2(0.365341024366395, -0.930873748644204), +float2(0.499999999999999, -0.866025403784439), +float2(0.623489801858733, -0.78183148246803), +float2(0.733051871829827, -0.680172737770919), +float2(0.826238774315994, -0.563320058063623), +float2(0.900968867902419, -0.433883739117558), +float2(0.955572805786141, -0.294755174410905), +float2(0.988830826225128, -0.149042266176175), +}; + #endif // UNITY_POSTFX_DISK_KERNELS From 8a0758fc4074e584591f293e435a0f0e80a06367 Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Sun, 7 Apr 2024 20:52:44 -0400 Subject: [PATCH 2/5] Update to get same result with unified and original brute-force approach. --- .../Runtime/Effects/DepthOfField.cs | 38 +++---- .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 6 +- .../Shaders/Builtins/DepthOfField.hlsl | 98 +++++++++++++++---- .../Shaders/Builtins/DepthOfField.shader | 2 +- .../Shaders/Builtins/DiskKernels.hlsl | 2 +- 5 files changed, 105 insertions(+), 41 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 7fe39b6ce96..8a52d436ac1 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -139,12 +139,14 @@ RenderTextureFormat SelectFormat(RenderTextureFormat primary, RenderTextureForma return RenderTextureFormat.Default; } - float CalculateMaxCoCRadius(int screenHeight) + float CalculateMaxCoCRadius(int screenHeight, out int mipLevel) { // Estimate the allowable maximum radius of CoC from the kernel // size (the equation below was empirically derived). float radiusInPixels = (float)settings.kernelSize.value * 4f + 6f; - + // Find the miplevel encasing the bokeh radius. + mipLevel = (int)(Mathf.Log(radiusInPixels * 2 - 1) / Mathf.Log(2)); + // Applying a 5% limit to the CoC radius to keep the size of // TileMax/NeighborMax small enough. return Mathf.Min(0.05f, radiusInPixels / screenHeight); @@ -176,7 +178,7 @@ RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, Re public override void Render(PostProcessRenderContext context) { - bool useUnified = true; // (kc) + bool useUnified = (Time.time % 2f) < 1f; // (kc) // The coc is stored in alpha so we need a 4 channels target. Note that using ARGB32 // will result in a very weak near-blur. @@ -189,7 +191,8 @@ public override void Render(PostProcessRenderContext context) var s1 = Mathf.Max(settings.focusDistance.value, f); var aspect = (float)context.screenWidth / (float)context.screenHeight; var coeff = f * f / (settings.aperture.value * (s1 - f) * scaledFilmHeight * 2f); - var maxCoC = CalculateMaxCoCRadius(context.screenHeight); + int maxCoCMipLevel; + var maxCoC = CalculateMaxCoCRadius(context.screenHeight, out maxCoCMipLevel); Vector4 cocKernelLimitsA; Vector4 cocKernelLimitsB; @@ -233,22 +236,21 @@ public override void Render(PostProcessRenderContext context) cmd.SetGlobalTexture(ShaderIDs.CoCTex, historyWrite); } - int macCoCIndex = 4; - - // Downsampling CoC - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> 1, context.height >> 1); - cmd.BlitFullscreenTriangle(ShaderIDs.CoCTex, ShaderIDs.MaxCoCMips[1], sheet, (int)Pass.downsampleInitialMaxCoC); - for (int i = 2; i <= macCoCIndex; ++i) - { - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> i, context.height >> i); - cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[i-1], ShaderIDs.MaxCoCMips[i], sheet, (int)Pass.downsampleMaxCoC); - } - if (useUnified) { + // Downsampling CoC + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> 1, context.height >> 1); + cmd.BlitFullscreenTriangle(ShaderIDs.CoCTex, ShaderIDs.MaxCoCMips[1], sheet, (int)Pass.downsampleInitialMaxCoC); + + for (int i = 2; i <= maxCoCMipLevel; ++i) + { + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> i, context.height >> i); + cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[i - 1], ShaderIDs.MaxCoCMips[i], sheet, (int)Pass.downsampleMaxCoC); + } + // Extend CoC - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> macCoCIndex, context.height >> macCoCIndex); - cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[macCoCIndex], ShaderIDs.MaxCoCTex, sheet, (int)Pass.extendMaxCoC); + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> maxCoCMipLevel, context.height >> maxCoCMipLevel); + cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[maxCoCMipLevel], ShaderIDs.MaxCoCTex, sheet, (int)Pass.extendMaxCoC); } // Downsampling and prefiltering pass @@ -257,7 +259,7 @@ public override void Render(PostProcessRenderContext context) // Bokeh simulation pass context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.DepthOfFieldTemp, 0, colorFormat, RenderTextureReadWrite.Default, FilterMode.Bilinear, context.width / 2, context.height / 2); - cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, useUnified ? (int)Pass.BokehUnified : (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, useUnified ? (int)Pass.BokehUnified : (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); // Postfilter pass cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTemp, ShaderIDs.DepthOfFieldTex, sheet, (int)Pass.PostFilter); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index 22551387300..802fe4ed8e0 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -67,7 +67,11 @@ static class ShaderIDs internal static readonly int DepthOfFieldTemp = Shader.PropertyToID("_DepthOfFieldTemp"); internal static readonly int DepthOfFieldTex = Shader.PropertyToID("_DepthOfFieldTex"); - internal static readonly int[] MaxCoCMips = new int[] { Shader.PropertyToID("_CoCMip0"), Shader.PropertyToID("_CoCMip1"), Shader.PropertyToID("_CoCMip2"), Shader.PropertyToID("_CoCMip3"), Shader.PropertyToID("_CoCMip4"), Shader.PropertyToID("_CoCMip5") }; + internal static readonly int[] MaxCoCMips = new int[] { + Shader.PropertyToID("_CoCMip0"), Shader.PropertyToID("_CoCMip1"), Shader.PropertyToID("_CoCMip2"), Shader.PropertyToID("_CoCMip3"), + Shader.PropertyToID("_CoCMip4"), Shader.PropertyToID("_CoCMip5"), Shader.PropertyToID("_CoCMip6"), Shader.PropertyToID("_CoCMip7"), + Shader.PropertyToID("_CoCMip8"), Shader.PropertyToID("_CoCMip9"), Shader.PropertyToID("_CoCMip10"), Shader.PropertyToID("_CoCMip11") + }; internal static readonly int MaxCoCTex = Shader.PropertyToID("_MaxCoCTex"); internal static readonly int Distance = Shader.PropertyToID("_Distance"); internal static readonly int LensCoeff = Shader.PropertyToID("_LensCoeff"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index c23669d0472..68b279533d5 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -213,23 +213,84 @@ half4 FragBlur(VaryingsDefault i) : SV_Target // normalized value in range [0, 1] half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; - int sampleCount; + int sampleCount = kSampleCount; + + half4 bgAcc = 0.0; // Background: far field bokeh + half4 fgAcc = 0.0; // Foreground: near field bokeh + + const half margin = _MainTex_TexelSize.y * 2; + UNITY_LOOP + for (int si = 0; si < sampleCount; si++) + { #if defined(KERNEL_UNIFIED) + float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0 / 8.0); +#else + float2 disp = kDiskKernel[si] * _MaxCoC; +#endif + float dist = length(disp); + + float2 duv = float2(disp.x * _RcpAspect, disp.y); + half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(i.texcoord + duv)); + + // BG: Compare CoC of the current sample and the center sample + // and select smaller one. + half bgCoC = max(min(samp0.a, samp.a), 0.0); + + // Compare the CoC to the sample distance. + // Add a small margin to smooth out. + half bgWeight = saturate((bgCoC - dist + margin) / margin); + half fgWeight = saturate((-samp.a - dist + margin) / margin); + + // Cut influence from focused areas because they're darkened by CoC + // premultiplying. This is only needed for near field. + fgWeight *= step(_MainTex_TexelSize.y, -samp.a); + + // Accumulation + bgAcc += half4(samp.rgb, 1.0) * bgWeight; + fgAcc += half4(samp.rgb, 1.0) * fgWeight; + } + + // Get the weighted average. + bgAcc.rgb /= bgAcc.a + (bgAcc.a == 0.0); // zero-div guard + fgAcc.rgb /= fgAcc.a + (fgAcc.a == 0.0); + + // BG: Calculate the alpha value only based on the center CoC. + // This is a rather aggressive approximation but provides stable results. + bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); + + // FG: Normalize the total of the weights. + fgAcc.a *= PI / sampleCount; + + // Alpha premultiplying + half alpha = saturate(fgAcc.a); + half3 rgb = lerp(bgAcc.rgb, fgAcc.rgb, alpha); + + return half4(rgb, alpha); +} + +// Bokeh filter with disk-shaped kernels +half4 FragBlurUnified(VaryingsDefault i) : SV_Target +{ + half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); + // normalized value in range [0, 1] + half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; + + int sampleCount; + UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[0]) - sampleCount = kDiskKernelSizes[0]; + sampleCount = kDiskAllKernelSizes[0]; + // margin adjustment later in the shader code artifically expand bokeh by 4px in fullscreen units (1 extra ring), we cannot have small bokeh as a result! else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[1]) - sampleCount = kDiskKernelSizes[1]; + sampleCount = kDiskAllKernelSizes[1+1]; else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[2]) - sampleCount = kDiskKernelSizes[2]; + sampleCount = kDiskAllKernelSizes[2+1]; else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[3]) - sampleCount = kDiskKernelSizes[3]; + sampleCount = kDiskAllKernelSizes[3+1]; else - sampleCount = kDiskKernelSizes[4]; - //(kc) sampleCount = kDiskKernelSizes[4]; -#else - sampleCount = kSampleCount; -#endif + sampleCount = kDiskAllKernelSizes[4]; + + const half margin = _MainTex_TexelSize.y * 2; half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh @@ -237,8 +298,8 @@ half4 FragBlur(VaryingsDefault i) : SV_Target UNITY_LOOP for (int si = 0; si < sampleCount; si++) { - //float2 disp = kDiskKernel[si] * _MaxCoC; - float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0/8.0); + float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0 / 8.0); + float dist = length(disp); float2 duv = float2(disp.x * _RcpAspect, disp.y); @@ -250,8 +311,7 @@ half4 FragBlur(VaryingsDefault i) : SV_Target // Compare the CoC to the sample distance. // Add a small margin to smooth out. - const half margin = _MainTex_TexelSize.y * 2; - half bgWeight = saturate((bgCoC - dist + margin) / margin); + half bgWeight = saturate((bgCoC - dist + margin) / margin); half fgWeight = saturate((-samp.a - dist + margin) / margin); // Cut influence from focused areas because they're darkened by CoC @@ -272,27 +332,25 @@ half4 FragBlur(VaryingsDefault i) : SV_Target bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); // FG: Normalize the total of the weights. - fgAcc.a *= PI / kSampleCount; + fgAcc.a *= PI / sampleCount; // Alpha premultiplying half alpha = saturate(fgAcc.a); half3 rgb = lerp(bgAcc.rgb, fgAcc.rgb, alpha); -#if defined(KERNEL_UNIFIED) - if (i.texcoord.x < 0.1) + /* + if (i.texcoord.x < 0.05) rgb.r += 0.5; // (kc) - /* if (sampleCount == 8) rgb.r += 0.5; if (sampleCount == 22) rgb.g += 0.5; if (sampleCount == 43) rgb.b += 0.5; - if (sampleCount == 1) + if (sampleCount == 71) rgb.rg += 0.5; */ -#endif return half4(rgb, alpha); } diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index 9d852ee5be1..fc7c55822bb 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -139,7 +139,7 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragBlur + #pragma fragment FragBlurUnified #define KERNEL_UNIFIED #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl index 31f68f90387..37f6a96693d 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl @@ -202,7 +202,7 @@ static const float2 kDiskKernel[kSampleCount] = { #endif -static const int kDiskKernelSizes[7] = { 1, 8, 22, 43, 71, 106, 148 }; +static const int kDiskAllKernelSizes[7] = { 1, 8, 22, 43, 71, 106, 148 }; static const float2 kDiskAllKernels[148] = { float2(0, 0), // ring 1 index=1 From 418ad5391637ef258b9f03d0d85a8a1fd3272ccd Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Thu, 11 Apr 2024 00:40:43 -0400 Subject: [PATCH 3/5] Added UNITY_NEAR_CLIP_VALUE. --- com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl | 1 + 11 files changed, 11 insertions(+) diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl index 8427b9c8ae3..0bb74573a4a 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl @@ -1,6 +1,7 @@ // ALso used for Direct3D 11 "feature level 9.x" target for Windows Store and Windows Phone #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 0 +#define UNITY_NEAR_CLIP_VALUE (0.0) #define UNITY_GATHER_SUPPORTED 0 #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl index 50916982d13..5ac8a00ed59 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED 0 // Currently broken on Metal for some reason (May 2017) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl index 9c7cd11f11d..0eb657ff46e 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl @@ -1,6 +1,7 @@ // For now OpenGL is considered at GLES2 level #define UNITY_UV_STARTS_AT_TOP 0 #define UNITY_REVERSED_Z 0 +#define UNITY_NEAR_CLIP_VALUE (-1.0) #define UNITY_GATHER_SUPPORTED 0 #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl index dcb0c7e37c7..25b3356f58f 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl @@ -1,6 +1,7 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 0 +#define UNITY_NEAR_CLIP_VALUE (0.0) #define UNITY_GATHER_SUPPORTED 0 #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 0 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl index bf2d2333360..046df14d7a6 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define TEXTURE2D_SAMPLER2D(textureName, samplerName) Texture2D textureName; SamplerState samplerName diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 From 69f1e7c90af3ce34b41010085802de1d51d1271d Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Thu, 11 Apr 2024 22:05:44 -0400 Subject: [PATCH 4/5] Added static tile version. --- .../Runtime/Effects/DepthOfField.cs | 53 ++++++- .../Runtime/Utils/RuntimeUtilities.cs | 28 ++++ .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 7 + .../Shaders/Builtins/DepthOfField.hlsl | 136 +++++++++++++++--- .../Shaders/Builtins/DepthOfField.shader | 66 ++++++++- 5 files changed, 254 insertions(+), 36 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 8a52d436ac1..32a118e0438 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -98,6 +98,10 @@ enum Pass BokehLargeKernel, BokehVeryLargeKernel, BokehUnified, + BokehKernel1, + BokehKernel2, + BokehKernel3, + BokehKernel4, PostFilter, Combine, DebugOverlay @@ -178,7 +182,8 @@ RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, Re public override void Render(PostProcessRenderContext context) { - bool useUnified = (Time.time % 2f) < 1f; // (kc) + bool useUnified = true;// (Time.time % 2f) < 1f; // (kc) + bool useStaticTiles = false; // The coc is stored in alpha so we need a 4 channels target. Note that using ARGB32 // will result in a very weak near-blur. @@ -194,6 +199,11 @@ public override void Render(PostProcessRenderContext context) int maxCoCMipLevel; var maxCoC = CalculateMaxCoCRadius(context.screenHeight, out maxCoCMipLevel); + // pad full-resolution screen so that the number of mips required by maxCoCMipLevel does not cause the downsampling chain to skip row or colums of pixels. + int tileSize = 1 << maxCoCMipLevel; + int paddedWidth = ((context.width + tileSize - 1) >> maxCoCMipLevel) << maxCoCMipLevel; + int paddedHeight = ((context.height + tileSize - 1) >> maxCoCMipLevel) << maxCoCMipLevel; + Vector4 cocKernelLimitsA; Vector4 cocKernelLimitsB; CalculateCoCKernelLimits(context.screenHeight, out cocKernelLimitsA, out cocKernelLimitsB); @@ -206,7 +216,13 @@ public override void Render(PostProcessRenderContext context) sheet.properties.SetFloat(ShaderIDs.LensCoeff, coeff); sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsA, cocKernelLimitsA); sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsB, cocKernelLimitsB); + sheet.properties.SetVector(ShaderIDs.MaxCoCTexUvScale, new Vector4(paddedWidth / (float)context.width, paddedHeight / (float)context.height, context.width / (float)paddedWidth, context.height / (float)paddedHeight)); sheet.properties.SetFloat(ShaderIDs.MaxCoC, maxCoC); + sheet.properties.SetVector(ShaderIDs.CoCScreen, new Vector4(context.width, context.height, 1f / context.width, 1f / context.height)); + sheet.properties.SetFloat(ShaderIDs.CoCTileXCount, paddedWidth >> maxCoCMipLevel); + sheet.properties.SetFloat(ShaderIDs.CoCTileYCount, paddedHeight >> maxCoCMipLevel); + sheet.properties.SetFloat(ShaderIDs.CoCTilePixelWidth, 1 << maxCoCMipLevel); + sheet.properties.SetFloat(ShaderIDs.CoCTilePixelHeight, 1 << maxCoCMipLevel); sheet.properties.SetFloat(ShaderIDs.RcpMaxCoC, 1f / maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpAspect, 1f / aspect); @@ -236,20 +252,20 @@ public override void Render(PostProcessRenderContext context) cmd.SetGlobalTexture(ShaderIDs.CoCTex, historyWrite); } - if (useUnified) + if (useUnified || useStaticTiles) { // Downsampling CoC - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> 1, context.height >> 1); + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, paddedWidth >> 1, paddedHeight >> 1); cmd.BlitFullscreenTriangle(ShaderIDs.CoCTex, ShaderIDs.MaxCoCMips[1], sheet, (int)Pass.downsampleInitialMaxCoC); for (int i = 2; i <= maxCoCMipLevel; ++i) { - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> i, context.height >> i); + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, paddedWidth >> i, paddedHeight >> i); cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[i - 1], ShaderIDs.MaxCoCMips[i], sheet, (int)Pass.downsampleMaxCoC); } // Extend CoC - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> maxCoCMipLevel, context.height >> maxCoCMipLevel); + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, paddedWidth >> maxCoCMipLevel, paddedHeight >> maxCoCMipLevel); cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[maxCoCMipLevel], ShaderIDs.MaxCoCTex, sheet, (int)Pass.extendMaxCoC); } @@ -259,7 +275,32 @@ public override void Render(PostProcessRenderContext context) // Bokeh simulation pass context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.DepthOfFieldTemp, 0, colorFormat, RenderTextureReadWrite.Default, FilterMode.Bilinear, context.width / 2, context.height / 2); - cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, useUnified ? (int)Pass.BokehUnified : (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); + if (useUnified) + { + /* + int tileXCount = paddedWidth >> maxCoCMipLevel; + int tileYCount = paddedHeight >> maxCoCMipLevel; + int tileCount = tileXCount * tileYCount; + cmd.SetGlobalFloat(ShaderIDs.CoCRingCount, 2.0f); + cmd.BlitProcedural(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehUnified, 6, tileCount); + */ + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehUnified); + } + else if (useStaticTiles) + { + int tileXCount = paddedWidth >> maxCoCMipLevel; + int tileYCount = paddedHeight >> maxCoCMipLevel; + int tileCount = tileXCount * tileYCount; + for (int i = 0; i < 4; ++i) + { + cmd.SetGlobalFloat(ShaderIDs.CoCRingCount, i + 1); + cmd.BlitProcedural(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehKernel1 + i, 6, tileCount); + } + } + else + { + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); + } // Postfilter pass cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTemp, ShaderIDs.DepthOfFieldTex, sheet, (int)Pass.PostFilter); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs index 459cca0306b..246e384abb7 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs @@ -556,6 +556,34 @@ public static void BlitFullscreenTriangle(this CommandBuffer cmd, RenderTargetId #endif } + /// + /// Blits procedural geometry using a given material. + /// + /// The command buffer to use + /// The source render target + /// The destination render target + /// The property sheet to use + /// The pass from the material to use + /// The number of instances to render + /// Should the destination target be cleared? + /// An optional viewport to consider for the blit + /// Should the depth buffer be preserved? + public static void BlitProcedural(this CommandBuffer cmd, RenderTargetIdentifier source, RenderTargetIdentifier destination, PropertySheet propertySheet, int pass, int vertexCount, int instanceCount, bool clear = false, Rect? viewport = null, bool preserveDepth = false) + { + cmd.SetGlobalTexture(ShaderIDs.MainTex, source); + var loadAction = viewport == null ? LoadAction.DontCare : LoadAction.Load; + cmd.SetRenderTargetWithLoadStoreAction(destination, loadAction, StoreAction.Store, preserveDepth ? LoadAction.Load : loadAction, StoreAction.Store); + + if (viewport != null) + cmd.SetViewport(viewport.Value); + + if (clear) + cmd.ClearRenderTarget(true, true, Color.clear); + + // TODO: detect which platforms support quads + cmd.DrawProcedural(Matrix4x4.identity, propertySheet.material, pass, MeshTopology.Triangles, vertexCount, instanceCount, propertySheet.properties); + } + /// /// Blits a fullscreen triangle from a double-wide source. /// diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index 802fe4ed8e0..0a91a1c6b95 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -77,6 +77,13 @@ static class ShaderIDs internal static readonly int LensCoeff = Shader.PropertyToID("_LensCoeff"); internal static readonly int CoCKernelLimitsA = Shader.PropertyToID("_CoCKernelLimitsA"); internal static readonly int CoCKernelLimitsB = Shader.PropertyToID("_CoCKernelLimitsB"); + internal static readonly int MaxCoCTexUvScale = Shader.PropertyToID("_MaxCoCTexUvScale"); + internal static readonly int CoCRingCount = Shader.PropertyToID("_CoCRingCount"); + internal static readonly int CoCScreen = Shader.PropertyToID("_CoCScreen"); + internal static readonly int CoCTileXCount = Shader.PropertyToID("_CoCTileXCount"); + internal static readonly int CoCTileYCount = Shader.PropertyToID("_CoCTileYCount"); + internal static readonly int CoCTilePixelWidth = Shader.PropertyToID("_CoCTilePixelWidth"); + internal static readonly int CoCTilePixelHeight = Shader.PropertyToID("_CoCTilePixelHeight"); internal static readonly int MaxCoC = Shader.PropertyToID("_MaxCoC"); internal static readonly int RcpMaxCoC = Shader.PropertyToID("_RcpMaxCoC"); internal static readonly int RcpAspect = Shader.PropertyToID("_RcpAspect"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 68b279533d5..2d75e7cedd5 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -22,6 +22,13 @@ float _Distance; float _LensCoeff; // f^2 / (N * (S1 - f) * film_width * 2) half4 _CoCKernelLimitsA; half4 _CoCKernelLimitsB; +float4 _MaxCoCTexUvScale; // (kc)rename + move more variables to half +float _CoCRingCount; +float4 _CoCScreen; +float _CoCTileXCount; +float _CoCTileYCount; +float _CoCTilePixelWidth; +float _CoCTilePixelHeight; float _MaxCoC; float _RcpMaxCoC; float _RcpAspect; @@ -150,9 +157,27 @@ half4 FragPrefilter(VaryingsDefault i) : SV_Target return half4(avg, coc); } +VaryingsDefault VertDownsampleCoC(AttributesDefault v) +{ + VaryingsDefault o; + o.vertex = float4(v.vertex.xy, 0.0, 1.0); + o.texcoord = TransformTriangleVertexToUV(v.vertex.xy); +#if defined(INITIAL_COC) + o.texcoord *= _MaxCoCTexUvScale.xy; +#endif + +#if UNITY_UV_STARTS_AT_TOP + o.texcoord = o.texcoord * float2(1.0, -1.0) + float2(0.0, 1.0); +#endif + + o.texcoordStereo = TransformStereoScreenSpaceTex(o.texcoord, 1.0); + + return o; +} + half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target { - // TODO gather version + // TODO implement gather version float3 duv = _MainTex_TexelSize.xyx * float3(0.5, 0.5, -0.5); float2 uv0 = UnityStereoTransformScreenSpaceTex(i.texcoord - duv.xy); @@ -168,6 +193,7 @@ half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target cocs.w = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv3).r; #if defined(INITIAL_COC) + // Storing the absolute normalized CoC is enough. cocs = cocs * 2.0 - 1.0; #endif cocs = abs(cocs); @@ -209,22 +235,24 @@ half4 FragExtendCoC(VaryingsDefault i) : SV_Target // Bokeh filter with disk-shaped kernels half4 FragBlur(VaryingsDefault i) : SV_Target { - half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); - // normalized value in range [0, 1] - half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; + const half margin = _MainTex_TexelSize.y * 2; - int sampleCount = kSampleCount; + half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh - const half margin = _MainTex_TexelSize.y * 2; +#if defined(KERNEL_UNIFIED) + int sampleCount = kDiskAllKernelSizes[KERNEL_UNIFIED]; +#else + int sampleCount = kSampleCount; +#endif UNITY_LOOP for (int si = 0; si < sampleCount; si++) { #if defined(KERNEL_UNIFIED) - float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0 / 8.0); + float2 disp = kDiskAllKernels[si] * (_MaxCoC * (12.0 / 8.0)); #else float2 disp = kDiskKernel[si] * _MaxCoC; #endif @@ -270,11 +298,11 @@ half4 FragBlur(VaryingsDefault i) : SV_Target } // Bokeh filter with disk-shaped kernels -half4 FragBlurUnified(VaryingsDefault i) : SV_Target +half4 FragBlurDynamic(VaryingsDefault i) : SV_Target { half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); // normalized value in range [0, 1] - half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; + half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo * _MaxCoCTexUvScale.zw).r; int sampleCount; @@ -298,7 +326,7 @@ half4 FragBlurUnified(VaryingsDefault i) : SV_Target UNITY_LOOP for (int si = 0; si < sampleCount; si++) { - float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0 / 8.0); + float2 disp = kDiskAllKernels[si] * (_MaxCoC * (12.0 / 8.0)); float dist = length(disp); @@ -338,21 +366,83 @@ half4 FragBlurUnified(VaryingsDefault i) : SV_Target half alpha = saturate(fgAcc.a); half3 rgb = lerp(bgAcc.rgb, fgAcc.rgb, alpha); - /* - if (i.texcoord.x < 0.05) - rgb.r += 0.5; // (kc) + return half4(rgb, alpha); +} - if (sampleCount == 8) - rgb.r += 0.5; - if (sampleCount == 22) - rgb.g += 0.5; - if (sampleCount == 43) - rgb.b += 0.5; - if (sampleCount == 71) - rgb.rg += 0.5; - */ +struct Attributes +{ + uint vertexID : SV_VertexID; + uint instanceID : SV_InstanceID; +}; - return half4(rgb, alpha); +uint2 UnpackTileID(uint tileID) +{ + return uint2(tileID & 0xFFFF, (tileID >> 16) & 0xFFFF); +} + +// 0 - 0,1 +// 1 - 0,0 +// 2 - 1,0 +// 3 - 1,1 +float4 GetQuadVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE) +{ + uint topBit = vertexID >> 1; + uint botBit = (vertexID & 1); + float x = topBit; + float y = 1 - (topBit + botBit) & 1; // produces 1 for indices 0,3 and 0 for 1,2 + float4 pos = float4(x, y, z, 1.0); + return pos; +} + +VaryingsDefault VertexTiling(Attributes input) +{ + uint2 tileCoord = uint2(input.instanceID % (uint)_CoCTileXCount, input.instanceID / (uint)_CoCTileXCount); // (kc) stereo mode? + // normalized value in range [0, 1] + half maxCoC = LOAD_TEXTURE2D(_MaxCoCTex, _MaxCoCTex_TexelSize, tileCoord).x; + + bool shouldDiscard; + + UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[0]) + shouldDiscard = _CoCRingCount != 0; + // margin adjustment later in the shader code artifically expand bokeh by 4px in fullscreen units (1 extra ring), we cannot have small bokeh as a result! + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[1]) + shouldDiscard = _CoCRingCount != 1+1; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[2]) + shouldDiscard = _CoCRingCount != 2+1; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[3]) + shouldDiscard = _CoCRingCount != 3+1; + else + shouldDiscard = _CoCRingCount != 4; + + VaryingsDefault output; + + [branch] if (shouldDiscard) + { + output.vertex = float4(-2, -2, -2, 1); + output.texcoord = 0.0.xx; + output.texcoordStereo = 0.0.xx; +#if STEREO_INSTANCING_ENABLED + output.stereoTargetEyeIndex = 0; +#endif + return output; + } + + // This handles both "real quad" and "2 triangles" cases: remaps {0, 1, 2, 3, 4, 5} into {0, 1, 2, 3, 0, 2}. + uint quadIndex = (input.vertexID & 0x03) + (input.vertexID >> 2) * (input.vertexID & 0x01); + float2 pp = GetQuadVertexPosition(quadIndex).xy; + uint2 pixelCoord = tileCoord * uint2(_CoCTilePixelWidth, _CoCTilePixelHeight); + pixelCoord += uint2(pp.xy * uint2(_CoCTilePixelWidth, _CoCTilePixelHeight)); + pixelCoord.y = _CoCScreen.y - pixelCoord.y; + float2 clipCoord = (pixelCoord * _CoCScreen.zw) * 2.0 - 1.0; + + output.vertex = float4(clipCoord, 0, 1); + output.texcoord = clipCoord * 0.5 + 0.5; + #if UNITY_UV_STARTS_AT_TOP + output.texcoord = output.texcoord * float2(1.0, -1.0) + float2(0.0, 1.0); + #endif + output.texcoordStereo = TransformStereoScreenSpaceTex(output.texcoord, 1.0); + + return output; } // Postfilter blur diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index fc7c55822bb..e31838aff22 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -50,7 +50,7 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 - #pragma vertex VertDefault + #pragma vertex VertDownsampleCoC #pragma fragment FragDownsampleCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL @@ -134,18 +134,70 @@ Shader "Hidden/PostProcessing/DepthOfField" Pass // 10 { - Name "Bokeh Filter (unified)" + Name "Bokeh Filter (dynamic)" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragBlurUnified - #define KERNEL_UNIFIED + #pragma fragment FragBlurDynamic + #define KERNEL_UNIFIED 4 + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 11 + { + Name "Bokeh Filter (1 ring)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertexTiling + #pragma fragment FragBlur + #define KERNEL_UNIFIED 1 + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 12 + { + Name "Bokeh Filter (2 rings)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertexTiling + #pragma fragment FragBlur + #define KERNEL_UNIFIED 2 + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 13 + { + Name "Bokeh Filter (3 rings)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertexTiling + #pragma fragment FragBlur + #define KERNEL_UNIFIED 3 + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 14 + { + Name "Bokeh Filter (4 rings)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertexTiling + #pragma fragment FragBlur + #define KERNEL_UNIFIED 4 #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } - Pass // 11 + Pass // 15 { Name "Postfilter" @@ -157,7 +209,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 12 + Pass // 16 { Name "Combine" @@ -169,7 +221,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 13 + Pass // 17 { Name "Debug Overlay" From d9da953f997470f00cff2df5dbfcaf585962dbab Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Mon, 15 Apr 2024 23:02:14 -0400 Subject: [PATCH 5/5] Shader code optimisation (7% faster), manual loop unrolling (15% faster). --- .../Runtime/Effects/DepthOfField.cs | 2 + .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 2 + .../Shaders/Builtins/DepthOfField.hlsl | 157 +++++++-- .../Shaders/Builtins/DiskKernels.hlsl | 311 +++++++++--------- 4 files changed, 282 insertions(+), 190 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 32a118e0438..93d5f0d19c1 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -217,6 +217,8 @@ public override void Render(PostProcessRenderContext context) sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsA, cocKernelLimitsA); sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsB, cocKernelLimitsB); sheet.properties.SetVector(ShaderIDs.MaxCoCTexUvScale, new Vector4(paddedWidth / (float)context.width, paddedHeight / (float)context.height, context.width / (float)paddedWidth, context.height / (float)paddedHeight)); + sheet.properties.SetVector(ShaderIDs.KernelScale, new Vector4(maxCoC * (12f / 8f) / aspect, maxCoC * (12f / 8f), maxCoC * (12f / 8f), 0f)); // (kc) hardcoded for 4 rings + sheet.properties.SetVector(ShaderIDs.MarginFactors, new Vector4(2f / (context.height >> 1), (context.height >> 1) / 2f, 0f, 0f)); sheet.properties.SetFloat(ShaderIDs.MaxCoC, maxCoC); sheet.properties.SetVector(ShaderIDs.CoCScreen, new Vector4(context.width, context.height, 1f / context.width, 1f / context.height)); sheet.properties.SetFloat(ShaderIDs.CoCTileXCount, paddedWidth >> maxCoCMipLevel); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index 0a91a1c6b95..37a3486f248 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -84,6 +84,8 @@ static class ShaderIDs internal static readonly int CoCTileYCount = Shader.PropertyToID("_CoCTileYCount"); internal static readonly int CoCTilePixelWidth = Shader.PropertyToID("_CoCTilePixelWidth"); internal static readonly int CoCTilePixelHeight = Shader.PropertyToID("_CoCTilePixelHeight"); + internal static readonly int KernelScale = Shader.PropertyToID("_KernelScale"); + internal static readonly int MarginFactors = Shader.PropertyToID("_MarginFactors"); internal static readonly int MaxCoC = Shader.PropertyToID("_MaxCoC"); internal static readonly int RcpMaxCoC = Shader.PropertyToID("_RcpMaxCoC"); internal static readonly int RcpAspect = Shader.PropertyToID("_RcpAspect"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 2d75e7cedd5..0b5c6a85241 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -29,6 +29,8 @@ float _CoCTileXCount; float _CoCTileYCount; float _CoCTilePixelWidth; float _CoCTilePixelHeight; +half3 _KernelScale; +half2 _MarginFactors; float _MaxCoC; float _RcpMaxCoC; float _RcpAspect; @@ -231,12 +233,35 @@ half4 FragExtendCoC(VaryingsDefault i) : SV_Target return half4(maxCoC, 0.0, 0.0, 0.0); } +void AccumSample(int si, half4 samp0, float2 texcoord, inout half4 bgAcc, inout half4 fgAcc) +{ + half2 disp = kDiskAllKernels[si].xy * _KernelScale.xy; + half dist = kDiskAllKernels[si].z * _KernelScale.z; + half2 duv = disp; + + half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(texcoord + duv)); + + // BG: Compare CoC of the current sample and the center sample + // and select smaller one. + half bgCoC = max(min(samp0.a, samp.a), 0.0); + + // Compare the CoC to the sample distance. + // Add a small margin to smooth out. + half bgWeight = saturate((bgCoC - dist + _MarginFactors.x) * _MarginFactors.y); + half fgWeight = saturate((-samp.a - dist + _MarginFactors.x) * _MarginFactors.y); + + // Cut influence from focused areas because they're darkened by CoC + // premultiplying. This is only needed for near field. + fgWeight *= step(_MainTex_TexelSize.y, -samp.a); + + // Accumulation + bgAcc += half4(samp.rgb, 1.0) * bgWeight; + fgAcc += half4(samp.rgb, 1.0) * fgWeight; +} // Bokeh filter with disk-shaped kernels half4 FragBlur(VaryingsDefault i) : SV_Target { - const half margin = _MainTex_TexelSize.y * 2; - half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); half4 bgAcc = 0.0; // Background: far field bokeh @@ -244,21 +269,25 @@ half4 FragBlur(VaryingsDefault i) : SV_Target #if defined(KERNEL_UNIFIED) int sampleCount = kDiskAllKernelSizes[KERNEL_UNIFIED]; + half rcpSampleCount = kDiskAllKernelRcpSizes[KERNEL_UNIFIED]; #else int sampleCount = kSampleCount; + half rcpSampleCount = 1.0 / kSampleCount; #endif - UNITY_LOOP + UNITY_FLATTEN for (int si = 0; si < sampleCount; si++) { #if defined(KERNEL_UNIFIED) - float2 disp = kDiskAllKernels[si] * (_MaxCoC * (12.0 / 8.0)); + half2 disp = kDiskAllKernels[si].xy * _KernelScale.xy; + half dist = kDiskAllKernels[si].z * _KernelScale.z; + half2 duv = disp; #else - float2 disp = kDiskKernel[si] * _MaxCoC; + half2 disp = kDiskKernel[si] * _MaxCoC; + half dist = length(disp); + half2 duv = half2(disp.x * _RcpAspect, disp.y); #endif - float dist = length(disp); - float2 duv = float2(disp.x * _RcpAspect, disp.y); half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(i.texcoord + duv)); // BG: Compare CoC of the current sample and the center sample @@ -267,8 +296,8 @@ half4 FragBlur(VaryingsDefault i) : SV_Target // Compare the CoC to the sample distance. // Add a small margin to smooth out. - half bgWeight = saturate((bgCoC - dist + margin) / margin); - half fgWeight = saturate((-samp.a - dist + margin) / margin); + half bgWeight = saturate((bgCoC - dist + _MarginFactors.x) * _MarginFactors.y); + half fgWeight = saturate((-samp.a - dist + _MarginFactors.x) * _MarginFactors.y); // Cut influence from focused areas because they're darkened by CoC // premultiplying. This is only needed for near field. @@ -288,7 +317,7 @@ half4 FragBlur(VaryingsDefault i) : SV_Target bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); // FG: Normalize the total of the weights. - fgAcc.a *= PI / sampleCount; + fgAcc.a *= PI * rcpSampleCount; // Alpha premultiplying half alpha = saturate(fgAcc.a); @@ -318,37 +347,95 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target else sampleCount = kDiskAllKernelSizes[4]; - const half margin = _MainTex_TexelSize.y * 2; - half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh - UNITY_LOOP - for (int si = 0; si < sampleCount; si++) - { - float2 disp = kDiskAllKernels[si] * (_MaxCoC * (12.0 / 8.0)); - - float dist = length(disp); - - float2 duv = float2(disp.x * _RcpAspect, disp.y); - half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(i.texcoord + duv)); - - // BG: Compare CoC of the current sample and the center sample - // and select smaller one. - half bgCoC = max(min(samp0.a, samp.a), 0.0); + AccumSample(0, samp0, i.texcoord, bgAcc, fgAcc); - // Compare the CoC to the sample distance. - // Add a small margin to smooth out. - half bgWeight = saturate((bgCoC - dist + margin) / margin); - half fgWeight = saturate((-samp.a - dist + margin) / margin); + UNITY_BRANCH if (sampleCount >= 8) + { + AccumSample( 1, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 2, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 3, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 4, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 5, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 6, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 7, samp0, i.texcoord, bgAcc, fgAcc); + } + UNITY_BRANCH if (sampleCount >= 22) + { + AccumSample( 8, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 9, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(10, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(11, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(12, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(13, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(14, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(15, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(16, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(17, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(18, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(19, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(20, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(21, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(22, samp0, i.texcoord, bgAcc, fgAcc); + } - // Cut influence from focused areas because they're darkened by CoC - // premultiplying. This is only needed for near field. - fgWeight *= step(_MainTex_TexelSize.y, -samp.a); + UNITY_BRANCH if (sampleCount >= 43) + { + AccumSample(23, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(24, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(25, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(26, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(27, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(28, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(29, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(30, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(31, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(32, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(33, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(34, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(35, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(36, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(37, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(38, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(39, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(40, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(41, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(42, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(43, samp0, i.texcoord, bgAcc, fgAcc); + } - // Accumulation - bgAcc += half4(samp.rgb, 1.0) * bgWeight; - fgAcc += half4(samp.rgb, 1.0) * fgWeight; + UNITY_BRANCH if (sampleCount >= 71) + { + AccumSample(44, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(45, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(46, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(47, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(48, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(49, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(50, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(51, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(52, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(53, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(54, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(55, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(56, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(57, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(58, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(59, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(60, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(61, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(62, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(63, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(64, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(65, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(66, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(67, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(68, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(69, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(70, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(71, samp0, i.texcoord, bgAcc, fgAcc); } // Get the weighted average. diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl index 37f6a96693d..6956c3d9eb8 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl @@ -203,161 +203,162 @@ static const float2 kDiskKernel[kSampleCount] = { static const int kDiskAllKernelSizes[7] = { 1, 8, 22, 43, 71, 106, 148 }; -static const float2 kDiskAllKernels[148] = { -float2(0, 0), -// ring 1 index=1 -float2(0.186046511627907, 0), -float2(0.115998102671392, 0.145457019994052), -float2(-0.0413992435267562, 0.181381937150107), -float2(-0.16762211495859, 0.0807225561148946), -float2(-0.16762211495859, -0.0807225561148945), -float2(-0.0413992435267562, -0.181381937150107), -float2(0.115998102671392, -0.145457019994052), -// ring 2 index=8 -float2(0.348837209302326, 0), -float2(0.314291465547356, 0.151354792715427), -float2(0.217496442508861, 0.272731912488848), -float2(0.0776235816126678, 0.34009113215645), -float2(-0.0776235816126678, 0.34009113215645), -float2(-0.217496442508861, 0.272731912488848), -float2(-0.314291465547355, 0.151354792715427), -float2(-0.348837209302326, 4.27202371795588E-17), -float2(-0.314291465547356, -0.151354792715427), -float2(-0.217496442508861, -0.272731912488848), -float2(-0.0776235816126679, -0.34009113215645), -float2(0.0776235816126674, -0.34009113215645), -float2(0.21749644250886, -0.272731912488848), -float2(0.314291465547356, -0.151354792715427), -// ring 3 index=22 -float2(0.511627906976744, 0), -float2(0.488897714588258, 0.150804972954416), -float2(0.422726814766323, 0.288210262265109), -float2(0.318994782346329, 0.400006804983643), -float2(0.186918663629318, 0.47626098767843), -float2(0.0382340013697985, 0.510197291581069), -float2(-0.113847919698579, 0.498800327162793), -float2(-0.255813953488372, 0.443082764726922), -float2(-0.375049794889679, 0.347995354208377), -float2(-0.460960816136121, 0.22198702931596), -float2(-0.505913445975647, 0.0762541826947871), -float2(-0.505913445975647, -0.0762541826947867), -float2(-0.460960816136121, -0.22198702931596), -float2(-0.375049794889679, -0.347995354208377), -float2(-0.255813953488372, -0.443082764726922), -float2(-0.11384791969858, -0.498800327162793), -float2(0.0382340013697985, -0.510197291581069), -float2(0.186918663629319, -0.47626098767843), -float2(0.318994782346329, -0.400006804983643), -float2(0.422726814766323, -0.288210262265109), -float2(0.488897714588258, -0.150804972954416), -// ring 4 index=43 -float2(0.674418604651163, 0), -float2(0.657509522169137, 0.150072257784491), -float2(0.607630166724887, 0.292619265916493), -float2(0.527281697478439, 0.420493122183797), -float2(0.420493122183797, 0.527281697478439), -float2(0.292619265916493, 0.607630166724887), -float2(0.150072257784491, 0.657509522169137), -float2(4.12962292735735E-17, 0.674418604651163), -float2(-0.150072257784491, 0.657509522169137), -float2(-0.292619265916493, 0.607630166724887), -float2(-0.420493122183797, 0.527281697478439), -float2(-0.527281697478438, 0.420493122183797), -float2(-0.607630166724887, 0.292619265916493), -float2(-0.657509522169137, 0.150072257784491), -float2(-0.674418604651163, 8.25924585471471E-17), -float2(-0.657509522169137, -0.150072257784491), -float2(-0.607630166724887, -0.292619265916493), -float2(-0.527281697478439, -0.420493122183797), -float2(-0.420493122183797, -0.527281697478439), -float2(-0.292619265916493, -0.607630166724887), -float2(-0.150072257784491, -0.657509522169137), -float2(-1.23888687820721E-16, -0.674418604651163), -float2(0.15007225778449, -0.657509522169137), -float2(0.292619265916493, -0.607630166724887), -float2(0.420493122183797, -0.527281697478439), -float2(0.527281697478439, -0.420493122183797), -float2(0.607630166724887, -0.292619265916492), -float2(0.657509522169137, -0.150072257784491), -// ring 5 index=71 -float2(0.837209302325581, 0), -float2(0.823755004408155, 0.149489493319789), -float2(0.783824542861175, 0.294174271323915), -float2(0.718701315573655, 0.429404046200294), -float2(0.630478436654186, 0.550832421716969), -float2(0.521991462021265, 0.654556589973234), -float2(0.396727252302976, 0.737242770856804), -float2(0.258711902267398, 0.796233362479663), -float2(0.112381338824084, 0.829632358689434), -float2(-0.0375612533167101, 0.836366288267147), -float2(-0.186296595870403, 0.81621871717548), -float2(-0.329044212547471, 0.769837204926796), -float2(-0.461216077494783, 0.698712491487602), -float2(-0.578564078221561, 0.605130583669444), -float2(-0.677316553430188, 0.492099280989047), -float2(-0.754299517313653, 0.363251502517026), -float2(-0.807038674070947, 0.222728521869775), -float2(-0.833838943809968, 0.0750468632679909), -float2(-0.833838943809968, -0.0750468632679907), -float2(-0.807038674070947, -0.222728521869774), -float2(-0.754299517313653, -0.363251502517025), -float2(-0.677316553430189, -0.492099280989047), -float2(-0.578564078221562, -0.605130583669444), -float2(-0.461216077494784, -0.698712491487602), -float2(-0.329044212547471, -0.769837204926796), -float2(-0.186296595870403, -0.81621871717548), -float2(-0.0375612533167103, -0.836366288267147), -float2(0.112381338824084, -0.829632358689434), -float2(0.258711902267398, -0.796233362479664), -float2(0.396727252302976, -0.737242770856804), -float2(0.521991462021265, -0.654556589973234), -float2(0.630478436654186, -0.550832421716969), -float2(0.718701315573655, -0.429404046200294), -float2(0.783824542861175, -0.294174271323915), -float2(0.823755004408155, -0.149489493319789), -// ring 6 index=106 -float2(1, 0), -float2(0.988830826225129, 0.149042266176174), -float2(0.955572805786141, 0.294755174410904), -float2(0.900968867902419, 0.433883739117558), -float2(0.826238774315995, 0.563320058063622), -float2(0.733051871829826, 0.680172737770919), -float2(0.623489801858734, 0.78183148246803), -float2(0.5, 0.866025403784439), -float2(0.365341024366395, 0.930873748644204), -float2(0.222520933956314, 0.974927912181824), -float2(0.0747300935864244, 0.99720379718118), -float2(-0.074730093586424, 0.99720379718118), -float2(-0.222520933956314, 0.974927912181824), -float2(-0.365341024366395, 0.930873748644204), -float2(-0.5, 0.866025403784439), -float2(-0.623489801858733, 0.78183148246803), -float2(-0.733051871829826, 0.680172737770919), -float2(-0.826238774315995, 0.563320058063622), -float2(-0.900968867902419, 0.433883739117558), -float2(-0.955572805786141, 0.294755174410905), -float2(-0.988830826225129, 0.149042266176175), -float2(-1, 1.22464679914735E-16), -float2(-0.988830826225129, -0.149042266176174), -float2(-0.955572805786141, -0.294755174410904), -float2(-0.900968867902419, -0.433883739117558), -float2(-0.826238774315995, -0.563320058063622), -float2(-0.733051871829826, -0.680172737770919), -float2(-0.623489801858734, -0.78183148246803), -float2(-0.5, -0.866025403784438), -float2(-0.365341024366395, -0.930873748644204), -float2(-0.222520933956315, -0.974927912181824), -float2(-0.0747300935864247, -0.99720379718118), -float2(0.0747300935864244, -0.99720379718118), -float2(0.222520933956314, -0.974927912181824), -float2(0.365341024366395, -0.930873748644204), -float2(0.499999999999999, -0.866025403784439), -float2(0.623489801858733, -0.78183148246803), -float2(0.733051871829827, -0.680172737770919), -float2(0.826238774315994, -0.563320058063623), -float2(0.900968867902419, -0.433883739117558), -float2(0.955572805786141, -0.294755174410905), -float2(0.988830826225128, -0.149042266176175), +static const half kDiskAllKernelRcpSizes[7] = { 1, 1.0/8, 1.0/22, 1.0/43, 1.0/71, 1.0/106, 1.0/148 }; +static const half3 kDiskAllKernels[148] = { + half3(0, 0, 0), + // ring 1 index=1 + half3(0.186046511627907, 0, 0.186046511627907), + half3(0.115998102671392, 0.145457019994052, 0.186046511627907), + half3(-0.0413992435267562, 0.181381937150107, 0.186046511627907), + half3(-0.16762211495859, 0.0807225561148946, 0.186046511627907), + half3(-0.16762211495859, -0.0807225561148945, 0.186046511627907), + half3(-0.0413992435267562, -0.181381937150107, 0.186046511627907), + half3(0.115998102671392, -0.145457019994052, 0.186046511627907), + // ring 2 index=8 + half3(0.348837209302326, 0, 0.348837209302326), + half3(0.314291465547356, 0.151354792715427, 0.348837209302326), + half3(0.217496442508861, 0.272731912488848, 0.348837209302326), + half3(0.0776235816126678, 0.34009113215645, 0.348837209302326), + half3(-0.0776235816126678, 0.34009113215645, 0.348837209302326), + half3(-0.217496442508861, 0.272731912488848, 0.348837209302326), + half3(-0.314291465547355, 0.151354792715427, 0.348837209302326), + half3(-0.348837209302326, 4.27202371795588E-17, 0.348837209302326), + half3(-0.314291465547356, -0.151354792715427, 0.348837209302326), + half3(-0.217496442508861, -0.272731912488848, 0.348837209302326), + half3(-0.0776235816126679, -0.34009113215645, 0.348837209302326), + half3(0.0776235816126674, -0.34009113215645, 0.348837209302326), + half3(0.21749644250886, -0.272731912488848, 0.348837209302326), + half3(0.314291465547356, -0.151354792715427, 0.348837209302326), + // ring 3 index=22 + half3(0.511627906976744, 0, 0.511627906976744), + half3(0.488897714588258, 0.150804972954416, 0.511627906976744), + half3(0.422726814766323, 0.288210262265109, 0.511627906976744), + half3(0.318994782346329, 0.400006804983643, 0.511627906976744), + half3(0.186918663629318, 0.47626098767843, 0.511627906976744), + half3(0.0382340013697985, 0.510197291581069, 0.511627906976744), + half3(-0.113847919698579, 0.498800327162793, 0.511627906976744), + half3(-0.255813953488372, 0.443082764726922, 0.511627906976744), + half3(-0.375049794889679, 0.347995354208377, 0.511627906976744), + half3(-0.460960816136121, 0.22198702931596, 0.511627906976744), + half3(-0.505913445975647, 0.0762541826947871, 0.511627906976744), + half3(-0.505913445975647, -0.0762541826947867, 0.511627906976744), + half3(-0.460960816136121, -0.22198702931596, 0.511627906976744), + half3(-0.375049794889679, -0.347995354208377, 0.511627906976744), + half3(-0.255813953488372, -0.443082764726922, 0.511627906976744), + half3(-0.11384791969858, -0.498800327162793, 0.511627906976744), + half3(0.0382340013697985, -0.510197291581069, 0.511627906976744), + half3(0.186918663629319, -0.47626098767843, 0.511627906976744), + half3(0.318994782346329, -0.400006804983643, 0.511627906976744), + half3(0.422726814766323, -0.288210262265109, 0.511627906976744), + half3(0.488897714588258, -0.150804972954416, 0.511627906976744), + // ring 4 index=43 + half3(0.674418604651163, 0, 0.674418604651163), + half3(0.657509522169137, 0.150072257784491, 0.674418604651163), + half3(0.607630166724887, 0.292619265916493, 0.674418604651163), + half3(0.527281697478439, 0.420493122183797, 0.674418604651163), + half3(0.420493122183797, 0.527281697478439, 0.674418604651163), + half3(0.292619265916493, 0.607630166724887, 0.674418604651163), + half3(0.150072257784491, 0.657509522169137, 0.674418604651163), + half3(4.12962292735735E-17, 0.674418604651163, 0.674418604651163), + half3(-0.150072257784491, 0.657509522169137, 0.674418604651163), + half3(-0.292619265916493, 0.607630166724887, 0.674418604651163), + half3(-0.420493122183797, 0.527281697478439, 0.674418604651163), + half3(-0.527281697478438, 0.420493122183797, 0.674418604651163), + half3(-0.607630166724887, 0.292619265916493, 0.674418604651163), + half3(-0.657509522169137, 0.150072257784491, 0.674418604651163), + half3(-0.674418604651163, 8.25924585471471E-17, 0.674418604651163), + half3(-0.657509522169137, -0.150072257784491, 0.674418604651163), + half3(-0.607630166724887, -0.292619265916493, 0.674418604651163), + half3(-0.527281697478439, -0.420493122183797, 0.674418604651163), + half3(-0.420493122183797, -0.527281697478439, 0.674418604651163), + half3(-0.292619265916493, -0.607630166724887, 0.674418604651163), + half3(-0.150072257784491, -0.657509522169137, 0.674418604651163), + half3(-1.23888687820721E-16, -0.674418604651163, 0.674418604651163), + half3(0.15007225778449, -0.657509522169137, 0.674418604651163), + half3(0.292619265916493, -0.607630166724887, 0.674418604651163), + half3(0.420493122183797, -0.527281697478439, 0.674418604651163), + half3(0.527281697478439, -0.420493122183797, 0.674418604651163), + half3(0.607630166724887, -0.292619265916492, 0.674418604651163), + half3(0.657509522169137, -0.150072257784491, 0.674418604651163), + // ring 5 index=71 + half3(0.837209302325581, 0, 0.837209302325581), + half3(0.823755004408155, 0.149489493319789, 0.837209302325581), + half3(0.783824542861175, 0.294174271323915, 0.837209302325581), + half3(0.718701315573655, 0.429404046200294, 0.837209302325581), + half3(0.630478436654186, 0.550832421716969, 0.837209302325581), + half3(0.521991462021265, 0.654556589973234, 0.837209302325581), + half3(0.396727252302976, 0.737242770856804, 0.837209302325581), + half3(0.258711902267398, 0.796233362479663, 0.837209302325581), + half3(0.112381338824084, 0.829632358689434, 0.837209302325581), + half3(-0.0375612533167101, 0.836366288267147, 0.837209302325581), + half3(-0.186296595870403, 0.81621871717548, 0.837209302325581), + half3(-0.329044212547471, 0.769837204926796, 0.837209302325581), + half3(-0.461216077494783, 0.698712491487602, 0.837209302325581), + half3(-0.578564078221561, 0.605130583669444, 0.837209302325581), + half3(-0.677316553430188, 0.492099280989047, 0.837209302325581), + half3(-0.754299517313653, 0.363251502517026, 0.837209302325581), + half3(-0.807038674070947, 0.222728521869775, 0.837209302325581), + half3(-0.833838943809968, 0.0750468632679909, 0.837209302325581), + half3(-0.833838943809968, -0.0750468632679907, 0.837209302325581), + half3(-0.807038674070947, -0.222728521869774, 0.837209302325581), + half3(-0.754299517313653, -0.363251502517025, 0.837209302325581), + half3(-0.677316553430189, -0.492099280989047, 0.837209302325581), + half3(-0.578564078221562, -0.605130583669444, 0.837209302325581), + half3(-0.461216077494784, -0.698712491487602, 0.837209302325581), + half3(-0.329044212547471, -0.769837204926796, 0.837209302325581), + half3(-0.186296595870403, -0.81621871717548, 0.837209302325582), + half3(-0.0375612533167103, -0.836366288267147, 0.837209302325581), + half3(0.112381338824084, -0.829632358689434, 0.837209302325581), + half3(0.258711902267398, -0.796233362479664, 0.837209302325581), + half3(0.396727252302976, -0.737242770856804, 0.837209302325581), + half3(0.521991462021265, -0.654556589973234, 0.837209302325581), + half3(0.630478436654186, -0.550832421716969, 0.837209302325581), + half3(0.718701315573655, -0.429404046200294, 0.837209302325581), + half3(0.783824542861175, -0.294174271323915, 0.837209302325581), + half3(0.823755004408155, -0.149489493319789, 0.837209302325581), + // ring 6 index=106 + half3(1, 0, 1), + half3(0.988830826225129, 0.149042266176174, 1), + half3(0.955572805786141, 0.294755174410904, 1), + half3(0.900968867902419, 0.433883739117558, 1), + half3(0.826238774315995, 0.563320058063622, 1), + half3(0.733051871829826, 0.680172737770919, 1), + half3(0.623489801858734, 0.78183148246803, 1), + half3(0.5, 0.866025403784439, 1), + half3(0.365341024366395, 0.930873748644204, 1), + half3(0.222520933956314, 0.974927912181824, 1), + half3(0.0747300935864244, 0.99720379718118, 1), + half3(-0.074730093586424, 0.99720379718118, 1), + half3(-0.222520933956314, 0.974927912181824, 1), + half3(-0.365341024366395, 0.930873748644204, 1), + half3(-0.5, 0.866025403784439, 1), + half3(-0.623489801858733, 0.78183148246803, 1), + half3(-0.733051871829826, 0.680172737770919, 1), + half3(-0.826238774315995, 0.563320058063622, 1), + half3(-0.900968867902419, 0.433883739117558, 1), + half3(-0.955572805786141, 0.294755174410905, 1), + half3(-0.988830826225129, 0.149042266176175, 1), + half3(-1, 1.22464679914735E-16, 1), + half3(-0.988830826225129, -0.149042266176174, 1), + half3(-0.955572805786141, -0.294755174410904, 1), + half3(-0.900968867902419, -0.433883739117558, 1), + half3(-0.826238774315995, -0.563320058063622, 1), + half3(-0.733051871829826, -0.680172737770919, 1), + half3(-0.623489801858734, -0.78183148246803, 1), + half3(-0.5, -0.866025403784438, 1), + half3(-0.365341024366395, -0.930873748644204, 1), + half3(-0.222520933956315, -0.974927912181824, 1), + half3(-0.0747300935864247, -0.99720379718118, 1), + half3(0.0747300935864244, -0.99720379718118, 1), + half3(0.222520933956314, -0.974927912181824, 1), + half3(0.365341024366395, -0.930873748644204, 1), + half3(0.499999999999999, -0.866025403784439, 1), + half3(0.623489801858733, -0.78183148246803, 1), + half3(0.733051871829827, -0.680172737770919, 1), + half3(0.826238774315994, -0.563320058063623, 1), + half3(0.900968867902419, -0.433883739117558, 1), + half3(0.955572805786141, -0.294755174410905, 1), + half3(0.988830826225128, -0.149042266176175, 1), }; #endif // UNITY_POSTFX_DISK_KERNELS