Skip to content

Commit d46998b

Browse files
authored
[DAG] Add ISD::VECTOR_COMPRESS handling in computeKnownBits/ComputeNumSignBits (llvm#159692)
Resolves llvm#158332
1 parent 34dfbb0 commit d46998b

File tree

5 files changed

+413
-0
lines changed

5 files changed

+413
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3480,6 +3480,17 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
34803480
break;
34813481
}
34823482
break;
3483+
case ISD::VECTOR_COMPRESS: {
3484+
SDValue Vec = Op.getOperand(0);
3485+
SDValue PassThru = Op.getOperand(2);
3486+
Known = computeKnownBits(PassThru, DemandedElts, Depth + 1);
3487+
// If we don't know any bits, early out.
3488+
if (Known.isUnknown())
3489+
break;
3490+
Known2 = computeKnownBits(Vec, Depth + 1);
3491+
Known = Known.intersectWith(Known2);
3492+
break;
3493+
}
34833494
case ISD::VECTOR_SHUFFLE: {
34843495
assert(!Op.getValueType().isScalableVector());
34853496
// Collect the known bits that are shared by every vector element referenced
@@ -4789,6 +4800,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
47894800
}
47904801
return Tmp;
47914802

4803+
case ISD::VECTOR_COMPRESS: {
4804+
SDValue Vec = Op.getOperand(0);
4805+
SDValue PassThru = Op.getOperand(2);
4806+
Tmp = ComputeNumSignBits(PassThru, DemandedElts, Depth + 1);
4807+
if (Tmp == 1)
4808+
return 1;
4809+
Tmp2 = ComputeNumSignBits(Vec, Depth + 1);
4810+
Tmp = std::min(Tmp, Tmp2);
4811+
return Tmp;
4812+
}
4813+
47924814
case ISD::VECTOR_SHUFFLE: {
47934815
// Collect the minimum number of sign bits that are shared by every vector
47944816
// element referenced by the shuffle.

llvm/test/CodeGen/AArch64/sve-vector-compress.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,42 @@ define <vscale x 4 x i4> @test_compress_illegal_element_type(<vscale x 4 x i4> %
100100
ret <vscale x 4 x i4> %out
101101
}
102102

103+
define <vscale x 4 x i32> @test_compress_knownbits_zext(<vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) nounwind {
104+
; CHECK-LABEL: test_compress_knownbits_zext:
105+
; CHECK: // %bb.0:
106+
; CHECK-NEXT: and z0.s, z0.s, #0xffff
107+
; CHECK-NEXT: cntp x8, p0, p0.s
108+
; CHECK-NEXT: and z1.s, z1.s, #0x3
109+
; CHECK-NEXT: compact z0.s, p0, z0.s
110+
; CHECK-NEXT: whilelo p0.s, xzr, x8
111+
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
112+
; CHECK-NEXT: ret
113+
%xvec = zext <vscale x 4 x i16> %vec to <vscale x 4 x i32>
114+
%xpassthru = and <vscale x 4 x i32> %passthru, splat (i32 3)
115+
%out = call <vscale x 4 x i32> @llvm.experimental.vector.compress(<vscale x 4 x i32> %xvec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %xpassthru)
116+
%res = and <vscale x 4 x i32> %out, splat (i32 65535)
117+
ret <vscale x 4 x i32> %res
118+
}
119+
120+
define <vscale x 4 x i32> @test_compress_numsignbits_sext(<vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) nounwind {
121+
; CHECK-LABEL: test_compress_numsignbits_sext:
122+
; CHECK: // %bb.0:
123+
; CHECK-NEXT: ptrue p1.s
124+
; CHECK-NEXT: and z1.s, z1.s, #0x3
125+
; CHECK-NEXT: cntp x8, p0, p0.s
126+
; CHECK-NEXT: sxth z0.s, p1/m, z0.s
127+
; CHECK-NEXT: compact z0.s, p0, z0.s
128+
; CHECK-NEXT: whilelo p0.s, xzr, x8
129+
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
130+
; CHECK-NEXT: ret
131+
%xvec = sext <vscale x 4 x i16> %vec to <vscale x 4 x i32>
132+
%xpassthru = and <vscale x 4 x i32> %passthru, splat (i32 3)
133+
%out = call <vscale x 4 x i32> @llvm.experimental.vector.compress(<vscale x 4 x i32> %xvec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %xpassthru)
134+
%shl = shl <vscale x 4 x i32> %out, splat (i32 16)
135+
%res = ashr <vscale x 4 x i32> %shl, splat (i32 16)
136+
ret <vscale x 4 x i32> %res
137+
}
138+
103139
define <vscale x 8 x i32> @test_compress_large(<vscale x 8 x i32> %vec, <vscale x 8 x i1> %mask) {
104140
; CHECK-LABEL: test_compress_large:
105141
; CHECK: // %bb.0:

llvm/test/CodeGen/AArch64/vector-compress.ll

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,3 +471,116 @@ define <3 x i3> @test_compress_narrow_illegal_element_type(<3 x i3> %vec, <3 x i
471471
%out = call <3 x i3> @llvm.experimental.vector.compress(<3 x i3> %vec, <3 x i1> %mask, <3 x i3> undef)
472472
ret <3 x i3> %out
473473
}
474+
475+
define <4 x i32> @test_compress_knownbits_zext_v4i16_4i32(<4 x i16> %vec, <4 x i1> %mask, <4 x i32> %passthru) nounwind {
476+
; CHECK-LABEL: test_compress_knownbits_zext_v4i16_4i32:
477+
; CHECK: ; %bb.0: ; %entry
478+
; CHECK-NEXT: sub sp, sp, #16
479+
; CHECK-NEXT: ushll.4s v1, v1, #0
480+
; CHECK-NEXT: movi.4s v3, #1
481+
; CHECK-NEXT: mov x14, sp
482+
; CHECK-NEXT: movi.4s v4, #3
483+
; CHECK-NEXT: ushll.4s v0, v0, #0
484+
; CHECK-NEXT: mov x13, sp
485+
; CHECK-NEXT: mov x12, sp
486+
; CHECK-NEXT: mov x15, sp
487+
; CHECK-NEXT: shl.4s v1, v1, #31
488+
; CHECK-NEXT: and.16b v2, v2, v4
489+
; CHECK-NEXT: cmlt.4s v1, v1, #0
490+
; CHECK-NEXT: str q2, [sp]
491+
; CHECK-NEXT: and.16b v3, v1, v3
492+
; CHECK-NEXT: mov.s w8, v1[1]
493+
; CHECK-NEXT: mov.s w9, v1[2]
494+
; CHECK-NEXT: mov.s w10, v1[3]
495+
; CHECK-NEXT: fmov w11, s1
496+
; CHECK-NEXT: addv.4s s1, v3
497+
; CHECK-NEXT: and x16, x11, #0x1
498+
; CHECK-NEXT: and x8, x8, #0x1
499+
; CHECK-NEXT: bfi x14, x11, #2, #1
500+
; CHECK-NEXT: add x8, x16, x8
501+
; CHECK-NEXT: and x9, x9, #0x1
502+
; CHECK-NEXT: and x10, x10, #0x1
503+
; CHECK-NEXT: fmov w11, s1
504+
; CHECK-NEXT: add x9, x8, x9
505+
; CHECK-NEXT: mov w16, #3 ; =0x3
506+
; CHECK-NEXT: add x10, x9, x10
507+
; CHECK-NEXT: orr x8, x12, x8, lsl #2
508+
; CHECK-NEXT: bfi x15, x9, #2, #2
509+
; CHECK-NEXT: cmp x10, #3
510+
; CHECK-NEXT: bfi x13, x11, #2, #2
511+
; CHECK-NEXT: mov.s w11, v0[3]
512+
; CHECK-NEXT: csel x9, x10, x16, lo
513+
; CHECK-NEXT: ldr w13, [x13]
514+
; CHECK-NEXT: str s0, [sp]
515+
; CHECK-NEXT: st1.s { v0 }[1], [x14]
516+
; CHECK-NEXT: st1.s { v0 }[2], [x8]
517+
; CHECK-NEXT: orr x8, x12, x9, lsl #2
518+
; CHECK-NEXT: csel w9, w11, w13, hi
519+
; CHECK-NEXT: st1.s { v0 }[3], [x15]
520+
; CHECK-NEXT: str w9, [x8]
521+
; CHECK-NEXT: ldr q0, [sp], #16
522+
; CHECK-NEXT: ret
523+
entry:
524+
%xvec = zext <4 x i16> %vec to <4 x i32>
525+
%xpassthru = and <4 x i32> %passthru, splat (i32 3)
526+
%out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %xvec, <4 x i1> %mask, <4 x i32> %xpassthru)
527+
%res = and <4 x i32> %out, splat (i32 65535)
528+
ret <4 x i32> %res
529+
}
530+
531+
define <4 x i32> @test_compress_numsignbits_sext_v4i16_4i32(<4 x i16> %vec, <4 x i1> %mask, <4 x i32> %passthru) nounwind {
532+
; CHECK-LABEL: test_compress_numsignbits_sext_v4i16_4i32:
533+
; CHECK: ; %bb.0: ; %entry
534+
; CHECK-NEXT: sub sp, sp, #16
535+
; CHECK-NEXT: ushll.4s v1, v1, #0
536+
; CHECK-NEXT: movi.4s v3, #1
537+
; CHECK-NEXT: mov x14, sp
538+
; CHECK-NEXT: movi.4s v4, #3
539+
; CHECK-NEXT: sshll.4s v0, v0, #0
540+
; CHECK-NEXT: mov x13, sp
541+
; CHECK-NEXT: mov x12, sp
542+
; CHECK-NEXT: mov x15, sp
543+
; CHECK-NEXT: shl.4s v1, v1, #31
544+
; CHECK-NEXT: and.16b v2, v2, v4
545+
; CHECK-NEXT: cmlt.4s v1, v1, #0
546+
; CHECK-NEXT: str q2, [sp]
547+
; CHECK-NEXT: and.16b v3, v1, v3
548+
; CHECK-NEXT: mov.s w8, v1[1]
549+
; CHECK-NEXT: mov.s w9, v1[2]
550+
; CHECK-NEXT: mov.s w10, v1[3]
551+
; CHECK-NEXT: fmov w11, s1
552+
; CHECK-NEXT: addv.4s s1, v3
553+
; CHECK-NEXT: and x16, x11, #0x1
554+
; CHECK-NEXT: and x8, x8, #0x1
555+
; CHECK-NEXT: bfi x14, x11, #2, #1
556+
; CHECK-NEXT: add x8, x16, x8
557+
; CHECK-NEXT: and x9, x9, #0x1
558+
; CHECK-NEXT: and x10, x10, #0x1
559+
; CHECK-NEXT: fmov w11, s1
560+
; CHECK-NEXT: add x9, x8, x9
561+
; CHECK-NEXT: mov w16, #3 ; =0x3
562+
; CHECK-NEXT: add x10, x9, x10
563+
; CHECK-NEXT: orr x8, x12, x8, lsl #2
564+
; CHECK-NEXT: bfi x15, x9, #2, #2
565+
; CHECK-NEXT: cmp x10, #3
566+
; CHECK-NEXT: bfi x13, x11, #2, #2
567+
; CHECK-NEXT: mov.s w11, v0[3]
568+
; CHECK-NEXT: csel x9, x10, x16, lo
569+
; CHECK-NEXT: ldr w13, [x13]
570+
; CHECK-NEXT: str s0, [sp]
571+
; CHECK-NEXT: st1.s { v0 }[1], [x14]
572+
; CHECK-NEXT: st1.s { v0 }[2], [x8]
573+
; CHECK-NEXT: orr x8, x12, x9, lsl #2
574+
; CHECK-NEXT: csel w9, w11, w13, hi
575+
; CHECK-NEXT: st1.s { v0 }[3], [x15]
576+
; CHECK-NEXT: str w9, [x8]
577+
; CHECK-NEXT: ldr q0, [sp], #16
578+
; CHECK-NEXT: ret
579+
entry:
580+
%xvec = sext <4 x i16> %vec to <4 x i32>
581+
%xpassthru = and <4 x i32> %passthru, splat(i32 3)
582+
%out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %xvec, <4 x i1> %mask, <4 x i32> %xpassthru)
583+
%shl = shl <4 x i32> %out, splat(i32 16)
584+
%res = ashr <4 x i32> %shl, splat(i32 16)
585+
ret <4 x i32> %res
586+
}

llvm/test/CodeGen/RISCV/rvv/vector-compress.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,39 @@ define <vscale x 4 x i32> @vector_compress_nxv4i32_passthru(<vscale x 4 x i32> %
346346
ret <vscale x 4 x i32> %ret
347347
}
348348

349+
define <vscale x 4 x i32> @test_compress_nvx8f64_knownbits(<vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) nounwind {
350+
; CHECK-LABEL: test_compress_nvx8f64_knownbits:
351+
; CHECK: # %bb.0:
352+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
353+
; CHECK-NEXT: vzext.vf2 v12, v8
354+
; CHECK-NEXT: vand.vi v8, v10, 3
355+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
356+
; CHECK-NEXT: vcompress.vm v8, v12, v0
357+
; CHECK-NEXT: ret
358+
%xvec = zext <vscale x 4 x i16> %vec to <vscale x 4 x i32>
359+
%xpassthru = and <vscale x 4 x i32> %passthru, splat (i32 3)
360+
%out = call <vscale x 4 x i32> @llvm.experimental.vector.compress(<vscale x 4 x i32> %xvec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %xpassthru)
361+
%res = and <vscale x 4 x i32> %out, splat (i32 65535)
362+
ret <vscale x 4 x i32> %res
363+
}
364+
365+
define <vscale x 4 x i32> @test_compress_nv8xf64_numsignbits(<vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) nounwind {
366+
; CHECK-LABEL: test_compress_nv8xf64_numsignbits:
367+
; CHECK: # %bb.0:
368+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
369+
; CHECK-NEXT: vsext.vf2 v12, v8
370+
; CHECK-NEXT: vand.vi v8, v10, 3
371+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
372+
; CHECK-NEXT: vcompress.vm v8, v12, v0
373+
; CHECK-NEXT: ret
374+
%xvec = sext <vscale x 4 x i16> %vec to <vscale x 4 x i32>
375+
%xpassthru = and <vscale x 4 x i32> %passthru, splat (i32 3)
376+
%out = call <vscale x 4 x i32> @llvm.experimental.vector.compress(<vscale x 4 x i32> %xvec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %xpassthru)
377+
%shl = shl <vscale x 4 x i32> %out, splat (i32 16)
378+
%res = ashr <vscale x 4 x i32> %shl, splat (i32 16)
379+
ret <vscale x 4 x i32> %res
380+
}
381+
349382
define <vscale x 8 x i32> @vector_compress_nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x i1> %mask) {
350383
; CHECK-LABEL: vector_compress_nxv8i32:
351384
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)