Skip to content

Commit

Permalink
[AIE2] Check all divisions for shufflevector extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
ValentijnvdBeek committed Aug 2, 2024
1 parent d1411d4 commit 83f6f9d
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 30 deletions.
48 changes: 18 additions & 30 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,36 +524,24 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
}

// {1, 2, ..., |DstVector|} -> G_UNMERGE_VALUES
// Extracts the first chunk of the same size of the destination vector from
// the source
GeneratorType FirstQuarter = adderGenerator(0, DstNumElts - 1, 1);
if (matchCombineShuffleVector(MI, FirstQuarter, DstNumElts - 1)) {
// This optimization does not work if the target type is not a power of two,
// this can happen in some backends that support uneven vector types. We
// also need to make sure that the vector can be split into two.
if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 ||
SrcNumElts % DstNumElts != 0)
return false;
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
createUnmergeValue(MI, TargetReg, DstReg, 0, 0, SrcNumElts);
MI.eraseFromParent();
return true;
}

// {|DstVector|, |DstVector|+1, ..., 2 * |DstVector|} -> G_UNMERGE_VALUES
// Extracts the second chunk of the same size of the destination vector from
// the source
GeneratorType SecondQuarter =
adderGenerator(DstNumElts, (DstNumElts * 2) - 1, 1);
if (matchCombineShuffleVector(MI, SecondQuarter, DstNumElts - 1)) {
if (((SrcNumElts / 2) % 2) != 0 || SrcNumElts % DstNumElts != 0)
return false;
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
createUnmergeValue(MI, TargetReg, DstReg, 1, 0, SrcNumElts);
MI.eraseFromParent();
return true;
// Extracts the chunks of the same size of the destination vector from the
// source
for (uint8_t Current = 0, Total = SrcNumElts; Current < Total; Current++) {
uint32_t Start = Current * DstNumElts, End = Start + DstNumElts - 1;
GeneratorType Generator = adderGenerator(Start, End, 1);
if (matchCombineShuffleVector(MI, Generator, DstNumElts - 1)) {
// This optimization does not work if the target type is not a power of
// two, this can happen in some backends that support uneven vector types.
// We also need to make sure that the vector can be split into two.
if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 ||
SrcNumElts % DstNumElts != 0)
return false;
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
createUnmergeValue(MI, TargetReg, DstReg, Current, 0, SrcNumElts);
MI.eraseFromParent();
return true;
}
}

// After this point, it is assumed our shufflevectors work on vectors that can
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,86 @@ body: |
PseudoRET implicit $lr, implicit %0
...

---
name: extract_vector_256_1024_q1
legalized: false
body: |
bb.1.entry:
liveins: $y2
; CHECK-LABEL: name: extract_vector_256_1024_q1
; CHECK: liveins: $y2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
; CHECK-NEXT: $wl0 = COPY [[UV3]](<8 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0
%1:_(<32 x s32>) = COPY $y2
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(8, 9, 10, 11, 12, 13, 14, 15)
$wl0 = COPY %0:_(<8 x s32>)
PseudoRET implicit $lr, implicit $wl0
...

---
name: extract_vector_256_1024_q2
legalized: false
body: |
bb.1.entry:
liveins: $y2
; CHECK-LABEL: name: extract_vector_256_1024_q2
; CHECK: liveins: $y2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV1]](<16 x s32>)
; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0
%1:_(<32 x s32>) = COPY $y2
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(16, 17, 18, 19, 20, 21, 22, 23)
$wl0 = COPY %0:_(<8 x s32>)
PseudoRET implicit $lr, implicit $wl0
...

---
name: extract_vector_256_1024_q3
legalized: false
body: |
bb.1.entry:
liveins: $y2
; CHECK-LABEL: name: extract_vector_256_1024_q3
; CHECK: liveins: $y2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV1]](<16 x s32>)
; CHECK-NEXT: $wl0 = COPY [[UV3]](<8 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0
%1:_(<32 x s32>) = COPY $y2
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(24, 25, 26, 27, 28, 29, 30, 31)
$wl0 = COPY %0:_(<8 x s32>)
PseudoRET implicit $lr, implicit $wl0
...

---
name: extract_vector_256_1024_q4
legalized: false
body: |
bb.1.entry:
liveins: $y2
; CHECK-LABEL: name: extract_vector_256_1024_q4
; CHECK: liveins: $y2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0
%1:_(<32 x s32>) = COPY $y2
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7)
$wl0 = COPY %0:_(<8 x s32>)
PseudoRET implicit $lr, implicit $wl0
...

---
name: insert_vector_16_elements
legalized: false
Expand Down

0 comments on commit 83f6f9d

Please sign in to comment.