Skip to content

Commit

Permalink
[AIE2] Implement vshuffle selection
Browse files Browse the repository at this point in the history
  • Loading branch information
ValentijnvdBeek committed Aug 15, 2024
1 parent d1bd94a commit aec1600
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 32 deletions.
12 changes: 12 additions & 0 deletions llvm/lib/Target/AIE/AIE2InstrPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,18 @@ def : Pat<(int_aie2_vshuffle VEC512:$s1, VEC512:$s2, eR:$mod),
def : Pat<(int_aie2_vshuffle_bf16 VEC512:$s1, VEC512:$s2, eR:$mod),
(VSHUFFLE VEC512:$s1, VEC512:$s2, eR:$mod)>;

// VSHUFFLE generic opcodes translation
def vshuffle_node : SDNode<"AIE2::G_AIE_VSHUFFLE",
SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>]>>;
def : GINodeEquiv<G_AIE_VSHUFFLE, vshuffle_node>;

def : Pat<(v16i32 (vshuffle_node (v16i32 VEC512:$v0), (v16i32 VEC512:$v1), (i32 eR:$mode))),
(VSHUFFLE VEC512:$v0, VEC512:$v1, i32:$mode)>;
def : Pat<(v32i16 (vshuffle_node (v32i16 VEC512:$v0), (v32i16 VEC512:$v1), (i32 eR:$mode))),
(VSHUFFLE VEC512:$v0, VEC512:$v1, i32:$mode)>;
def : Pat<(v64i8 (vshuffle_node (v64i8 VEC512:$v0), (v64i8 VEC512:$v1), (i32 eR:$mode))),
(VSHUFFLE VEC512:$v0, VEC512:$v1, i32:$mode)>;

// VSHIFT Intrinsic (shift/shiftx/shift_bytes)
def : Pat<(int_aie2_vshift_I512_I512 VEC512:$s1, VEC512:$s2, 0x0, eR:$shift),
(VSHIFT VEC512:$s1, VEC512:$s2, eR:$shift)>;
Expand Down
70 changes: 42 additions & 28 deletions llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,39 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
return false;
}

bool createVShuffle(MachineInstr &MI, const LLT TargetTy, const uint8_t Mode) {
MachineIRBuilder MIB(MI);
MachineRegisterInfo &MRI = *MIB.getMRI();
const Register DstReg = MI.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);

if (DstTy != TargetTy)
return false;

const Register Src1 = MI.getOperand(1).getReg();
const Register Src2 = MI.getOperand(2).getReg();
const Register ShuffleModeReg =
MRI.createGenericVirtualRegister(LLT::scalar(32));

// This combiner only cares about the lower bits, so we can pad the
// vector to cover the case where two separate vectors are shuffled.
// together
MIB.buildConstant(ShuffleModeReg, Mode);
if (MRI.getType(Src1) == TargetTy) {
MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg},
{Src1, Src2, ShuffleModeReg});
} else {
// We reuse the same register since we ignore the high part of the vector
const Register TmpRegister = MRI.createGenericVirtualRegister(TargetTy);
MIB.buildConcatVectors(TmpRegister, {Src1, Src2});
MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg},
{TmpRegister, TmpRegister, ShuffleModeReg});
}

MI.eraseFromParent();
return true;
}

CombinerHelper::GeneratorType sectionGenerator(const int32_t From,
const int32_t To,
const int32_t Partitions,
Expand Down Expand Up @@ -192,34 +225,15 @@ bool AIE2PreLegalizerCombinerImpl::tryCombineShuffleVector(
const LLT V64S8 = LLT::fixed_vector(64, 8);
CombinerHelper::GeneratorType FourPartitions =
sectionGenerator(0, DstNumElts, 4, 1);
if (Helper.matchCombineShuffleVector(MI, FourPartitions, DstNumElts)) {
if (DstTy != V64S8)
return false;

const Register Src1 = MI.getOperand(1).getReg();
const Register Src2 = MI.getOperand(2).getReg();
const Register ShuffleModeReg =
MRI.createGenericVirtualRegister(LLT::scalar(32));

// This combiner only cares about the lower bits, so we can pad the
// vector to cover the case where two separate vectors are shuffled.
// together
MIB.buildConstant(ShuffleModeReg, 35);

if (SrcTy == V64S8) {
MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg},
{Src1, Src2, ShuffleModeReg});
} else {
// We reuse the same register since we ignore the high part of the vector
const Register TmpRegister = MRI.createGenericVirtualRegister(V64S8);
MIB.buildConcatVectors(TmpRegister, {Src1, Src2});
MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg},
{TmpRegister, TmpRegister, ShuffleModeReg});
}

MI.eraseFromParent();
return true;
}
if (Helper.matchCombineShuffleVector(MI, FourPartitions, DstNumElts))
return createVShuffle(MI, V64S8, 35);

const LLT V32S16 = LLT::fixed_vector(32, 16);
CombinerHelper::GeneratorType FourPartitionByTwo =
sectionGenerator(0, DstNumElts, 4, 2);
if (Helper.matchCombineShuffleVector(MI, FourPartitionByTwo, DstNumElts))
return createVShuffle(MI, V32S16, 29);

return false;
}

Expand Down
83 changes: 83 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-aie-vshuffle.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
#
# RUN: llc -mtriple aie2 -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s

---
name: vshuffle_32_m35
legalized: true
regBankSelected: true
tracksRegLiveness: true
stack:
- { id: 0, name: "", size: 128, alignment: 32 }
body: |
bb.0.entry:
liveins: $x2
; CHECK-LABEL: name: vshuffle_32_m35
; CHECK: liveins: $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 29
; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[COPY]], [[COPY]], [[MOV_RLC_imm10_pseudo]]
; CHECK-NEXT: $x0 = COPY [[VSHUFFLE]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:vregbank(<16 x s32>) = COPY $x2
%2:gprregbank(s32) = G_CONSTANT i32 29
%0:vregbank(<16 x s32>) = G_AIE_VSHUFFLE %1:vregbank, %1:vregbank, %2:gprregbank(s32)
$x0 = COPY %0:vregbank(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...

---
name: vshuffle_16_m35
legalized: true
regBankSelected: true
tracksRegLiveness: true
stack:
- { id: 0, name: "", size: 128, alignment: 32 }
body: |
bb.0.entry:
liveins: $x2
; CHECK-LABEL: name: vshuffle_16_m35
; CHECK: liveins: $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 29
; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[COPY]], [[COPY]], [[MOV_RLC_imm10_pseudo]]
; CHECK-NEXT: $x0 = COPY [[VSHUFFLE]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:vregbank(<32 x s16>) = COPY $x2
%2:gprregbank(s32) = G_CONSTANT i32 29
%0:vregbank(<32 x s16>) = G_AIE_VSHUFFLE %1:vregbank, %1:vregbank, %2:gprregbank(s32)
$x0 = COPY %0:vregbank(<32 x s16>)
PseudoRET implicit $lr, implicit $x0
...

---
name: vshuffle_8_m35
legalized: true
regBankSelected: true
tracksRegLiveness: true
stack:
- { id: 0, name: "", size: 128, alignment: 32 }
body: |
bb.0.entry:
liveins: $x2
; CHECK-LABEL: name: vshuffle_8_m35
; CHECK: liveins: $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 29
; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[COPY]], [[COPY]], [[MOV_RLC_imm10_pseudo]]
; CHECK-NEXT: $x0 = COPY [[VSHUFFLE]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:vregbank(<64 x s8>) = COPY $x2
%2:gprregbank(s32) = G_CONSTANT i32 29
%0:vregbank(<64 x s8>) = G_AIE_VSHUFFLE %1:vregbank, %1:vregbank, %2:gprregbank(s32)
$x0 = COPY %0:vregbank(<64 x s8>)
PseudoRET implicit $lr, implicit $x0
Original file line number Diff line number Diff line change
Expand Up @@ -853,12 +853,12 @@ body: |
...

---
name: shuffle_vector_32
name: shuffle_vector_32_4x4
legalized: false
body: |
bb.1.entry:
liveins: $x0, $x1
; CHECK-LABEL: name: shuffle_vector_32
; CHECK-LABEL: name: shuffle_vector_32_4x4
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
Expand All @@ -874,12 +874,12 @@ body: |
...

---
name: shuffle_vector_16
name: shuffle_vector_16_4x4
legalized: false
body: |
bb.1.entry:
liveins: $x0, $x1
; CHECK-LABEL: name: shuffle_vector_16
; CHECK-LABEL: name: shuffle_vector_16_4x4
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
Expand Down Expand Up @@ -939,3 +939,48 @@ body: |
$x2 = COPY %0:_(<64 x s8>)
PseudoRET implicit $lr, implicit $x2
...

---
name: shuffle_vector_1024_4x8
legalized: false
body: |
bb.1.entry:
liveins: $x0, $x1
; CHECK-LABEL: name: shuffle_vector_1024_4x8
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
; CHECK-NEXT: [[AIE_VSHUFFLE:%[0-9]+]]:_(<32 x s16>) = G_AIE_VSHUFFLE [[COPY]], [[COPY1]], [[C]](s32)
; CHECK-NEXT: $x2 = COPY [[AIE_VSHUFFLE]](<32 x s16>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2
%1:_(<32 x s16>) = COPY $x0
%2:_(<32 x s16>) = COPY $x1
%0:_(<32 x s16>) = G_SHUFFLE_VECTOR %1:_(<32 x s16>), %2:_, shufflemask(0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31)
$x2 = COPY %0:_(<32 x s16>)
PseudoRET implicit $lr, implicit $x2
...

---
name: shuffle_vector_512_4x8
legalized: false
body: |
bb.1.entry:
liveins: $wl0, $wl1
; CHECK-LABEL: name: shuffle_vector_512_4x8
; CHECK: liveins: $wl0, $wl1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $wl0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s16>) = COPY $wl1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[COPY]](<16 x s16>), [[COPY1]](<16 x s16>)
; CHECK-NEXT: [[AIE_VSHUFFLE:%[0-9]+]]:_(<32 x s16>) = G_AIE_VSHUFFLE [[CONCAT_VECTORS]], [[CONCAT_VECTORS]], [[C]](s32)
; CHECK-NEXT: $x2 = COPY [[AIE_VSHUFFLE]](<32 x s16>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2
%1:_(<16 x s16>) = COPY $wl0
%2:_(<16 x s16>) = COPY $wl1
%0:_(<32 x s16>) = G_SHUFFLE_VECTOR %1:_(<16 x s16>), %2:_, shufflemask(0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31)
$x2 = COPY %0:_(<32 x s16>)
PseudoRET implicit $lr, implicit $x2
...

0 comments on commit aec1600

Please sign in to comment.