|
| 1 | +using System; |
| 2 | + |
| 3 | +#if HWINTRINSICS |
| 4 | +using System.Runtime.Intrinsics; |
| 5 | +using System.Runtime.Intrinsics.X86; |
| 6 | +using System.Runtime.InteropServices; |
| 7 | +using System.Runtime.CompilerServices; |
| 8 | +#endif |
| 9 | + |
| 10 | +namespace PhotoSauce.MagicScaler.Transforms |
| 11 | +{ |
| 12 | + internal class OverlayTransform : PixelSource, IDisposable |
| 13 | + { |
| 14 | + const int bytesPerPixel = 4; |
| 15 | + |
| 16 | + private readonly PixelSource overSource; |
| 17 | + private readonly int offsX, offsY; |
| 18 | + private readonly bool passthrough; |
| 19 | + |
| 20 | + private ArraySegment<byte> lineBuff; |
| 21 | + |
| 22 | + public OverlayTransform(PixelSource source, PixelSource over, int left, int top, bool alpha, bool replay = false) : base(source) |
| 23 | + { |
| 24 | + if (Format.NumericRepresentation != PixelNumericRepresentation.UnsignedInteger || Format.ChannelCount != bytesPerPixel || Format.BitsPerPixel != bytesPerPixel * 8) |
| 25 | + throw new NotSupportedException("Pixel format not supported."); |
| 26 | + |
| 27 | + if (over.Format != Format) |
| 28 | + throw new NotSupportedException("Sources must be same pixel format."); |
| 29 | + |
| 30 | + overSource = over; |
| 31 | + offsX = left; |
| 32 | + offsY = top; |
| 33 | + passthrough = replay; |
| 34 | + |
| 35 | + if (alpha) |
| 36 | + lineBuff = BufferPool.Rent(over.Width * bytesPerPixel, true); |
| 37 | + } |
| 38 | + |
| 39 | + unsafe protected override void CopyPixelsInternal(in PixelArea prc, int cbStride, int cbBufferSize, IntPtr pbBuffer) |
| 40 | + { |
| 41 | + var inner = new PixelArea(offsX, offsY, overSource.Width, overSource.Height); |
| 42 | + |
| 43 | + int tx = Math.Max(prc.X - inner.X, 0); |
| 44 | + int tw = Math.Min(prc.Width, Math.Min(Math.Max(prc.X + prc.Width - inner.X, 0), inner.Width - tx)); |
| 45 | + int cx = Math.Max(inner.X - prc.X, 0); |
| 46 | + byte* pb = (byte*)pbBuffer; |
| 47 | + |
| 48 | + for (int y = 0; y < prc.Height; y++) |
| 49 | + { |
| 50 | + int cy = prc.Y + y; |
| 51 | + |
| 52 | + if (!passthrough || tw < prc.Width || cy < inner.Y || cy >= inner.Y + inner.Height) |
| 53 | + { |
| 54 | + Profiler.PauseTiming(); |
| 55 | + Source.CopyPixels(new PixelArea(prc.X, cy, prc.Width, 1), cbStride, cbBufferSize, (IntPtr)pb); |
| 56 | + Profiler.ResumeTiming(); |
| 57 | + } |
| 58 | + |
| 59 | + if (tw > 0 && cy >= inner.Y && cy < inner.Y + inner.Height) |
| 60 | + { |
| 61 | + var area = new PixelArea(tx, cy - inner.Y, tw, 1); |
| 62 | + var ptr = (IntPtr)(pb + cx * bytesPerPixel); |
| 63 | + |
| 64 | + if (lineBuff.Array is null) |
| 65 | + copyPixelsDirect(area, cbStride, cbBufferSize, ptr); |
| 66 | + else |
| 67 | + copyPixelsBuffered(area, ptr); |
| 68 | + } |
| 69 | + |
| 70 | + pb += cbStride; |
| 71 | + } |
| 72 | + } |
| 73 | + |
| 74 | + private void copyPixelsDirect(in PixelArea prc, int cbStride, int cbBufferSize, IntPtr pbBuffer) |
| 75 | + { |
| 76 | + Profiler.PauseTiming(); |
| 77 | + overSource.CopyPixels(prc, cbStride, cbBufferSize, pbBuffer); |
| 78 | + Profiler.ResumeTiming(); |
| 79 | + } |
| 80 | + |
| 81 | + unsafe private void copyPixelsBuffered(in PixelArea prc, IntPtr pbBuffer) |
| 82 | + { |
| 83 | + fixed (byte* buff = &lineBuff.Array![lineBuff.Offset]) |
| 84 | + { |
| 85 | + Profiler.PauseTiming(); |
| 86 | + overSource.CopyPixels(prc, lineBuff.Count, lineBuff.Count, (IntPtr)buff); |
| 87 | + Profiler.ResumeTiming(); |
| 88 | + |
| 89 | + uint* ip = (uint*)buff, ipe = ip + prc.Width; |
| 90 | + uint* op = (uint*)pbBuffer; |
| 91 | + |
| 92 | +#if HWINTRINSICS |
| 93 | + var shuffleMaskAlpha = (ReadOnlySpan<byte>)(new byte[] { 3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15 }); |
| 94 | + |
| 95 | + if (Avx2.IsSupported && prc.Width >= Vector256<uint>.Count) |
| 96 | + { |
| 97 | + var vshufa = Avx2.BroadcastVector128ToVector256((byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(shuffleMaskAlpha))); |
| 98 | + |
| 99 | + ipe -= Vector256<uint>.Count; |
| 100 | + do |
| 101 | + { |
| 102 | + var vi = Avx.LoadVector256(ip); |
| 103 | + ip += Vector256<uint>.Count; |
| 104 | + |
| 105 | + var va = Avx2.Shuffle(vi.AsByte(), vshufa).AsUInt32(); |
| 106 | + var vo = Avx2.Or(Avx2.And(va, vi), Avx2.AndNot(va, Avx.LoadVector256(op))); |
| 107 | + |
| 108 | + Avx.Store(op, vo); |
| 109 | + op += Vector256<uint>.Count; |
| 110 | + |
| 111 | + } while (ip <= ipe); |
| 112 | + ipe += Vector256<uint>.Count; |
| 113 | + } |
| 114 | + else if (Ssse3.IsSupported && prc.Width >= Vector128<uint>.Count) |
| 115 | + { |
| 116 | + var vshufa = Sse2.LoadVector128((byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(shuffleMaskAlpha))); |
| 117 | + |
| 118 | + ipe -= Vector128<uint>.Count; |
| 119 | + do |
| 120 | + { |
| 121 | + var vi = Sse2.LoadVector128(ip); |
| 122 | + ip += Vector128<uint>.Count; |
| 123 | + |
| 124 | + var va = Ssse3.Shuffle(vi.AsByte(), vshufa).AsUInt32(); |
| 125 | + var vo = Sse2.Or(Sse2.And(va, vi), Sse2.AndNot(va, Sse2.LoadVector128(op))); |
| 126 | + |
| 127 | + Sse2.Store(op, vo); |
| 128 | + op += Vector128<uint>.Count; |
| 129 | + |
| 130 | + } while (ip <= ipe); |
| 131 | + ipe += Vector128<uint>.Count; |
| 132 | + } |
| 133 | +#endif |
| 134 | + |
| 135 | + while (ip < ipe) |
| 136 | + { |
| 137 | + uint i = *ip++; |
| 138 | + if (i >> 24 != 0) |
| 139 | + *op = i; |
| 140 | + |
| 141 | + op++; |
| 142 | + } |
| 143 | + } |
| 144 | + } |
| 145 | + |
| 146 | + public void Dispose() |
| 147 | + { |
| 148 | + BufferPool.Return(lineBuff); |
| 149 | + lineBuff = default; |
| 150 | + } |
| 151 | + } |
| 152 | +} |
0 commit comments