#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
return false;
}
+// Check if an EXT instruction can handle the shuffle mask when the vector
+// sources of the shuffle are the same.
+static bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
+ unsigned NumElts = Ty.getNumElements();
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = M[0];
+ for (unsigned I = 1; I < NumElts; ++I) {
+ // Increment the expected index. If it wraps around, just follow it
+ // back to index zero and keep going.
+ ++ExpectedElt;
+ if (ExpectedElt == NumElts)
+ ExpectedElt = 0;
+
+ if (M[I] < 0)
+ continue; // Ignore UNDEF indices.
+ if (ExpectedElt != static_cast<unsigned>(M[I]))
+ return false;
+ }
+
+ return true;
+}
+
static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
ShuffleVectorPseudo &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
Register Dst = MI.getOperand(0).getReg();
- auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
- MRI.getType(Dst).getNumElements());
- if (!ExtInfo)
- return false;
- bool ReverseExt;
- uint64_t Imm;
- std::tie(ReverseExt, Imm) = *ExtInfo;
+ LLT DstTy = MRI.getType(Dst);
Register V1 = MI.getOperand(1).getReg();
Register V2 = MI.getOperand(2).getReg();
+ auto Mask = MI.getOperand(3).getShuffleMask();
+ uint64_t Imm;
+ auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
+ uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
+
+ if (!ExtInfo) {
+ if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
+ !isSingletonExtMask(Mask, DstTy))
+ return false;
+
+ Imm = Mask[0] * ExtFactor;
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
+ return true;
+ }
+ bool ReverseExt;
+ std::tie(ReverseExt, Imm) = *ExtInfo;
if (ReverseExt)
std::swap(V1, V2);
- uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
Imm *= ExtFactor;
MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
return true;
$q0 = COPY %shuf(<8 x s16>)
RET_ReallyLR implicit $q0
...
+---
+name: v2s64_singleton_ext
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: v2s64_singleton_ext
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_EXT %v1, %v1, [[C]](s32)
+ ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %v1:_(<2 x s64>) = COPY $q0
+ %v2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(1, 0)
+ $q0 = COPY %shuf(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: v2s64_singleton_ext_all_undef
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: v2s64_singleton_ext_all_undef
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_ZIP2 %v1, %v2
+ ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %v1:_(<2 x s64>) = COPY $q0
+ %v2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(undef, undef)
+ $q0 = COPY %shuf(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: v2s64_singleton_ext_same
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: v2s64_singleton_ext_same
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_DUPLANE64 %v1, [[C]](s64)
+ ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %v1:_(<2 x s64>) = COPY $q0
+ %v2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(1, 1)
+ $q0 = COPY %shuf(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
; CHECK-LABEL: name: uzp1_v4s32
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
- ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $q0 = COPY [[UZP1_]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 2, 4, 6)
; CHECK-LABEL: name: uzp2_v4s32
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
- ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $q0 = COPY [[UZP2_]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
- %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7)
- $q0 = COPY %1(<4 x s32>)
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7)
+ $q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...
; CHECK-LABEL: name: no_uzp1
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6)
- ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6)
+ ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 1, 4, 6)
; CHECK-LABEL: name: no_uzp2
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7)
- ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7)
+ ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 4, 5, 7)
; CHECK-LABEL: name: uzp1_undef
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
- ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $q0 = COPY [[UZP1_]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, -1, 4, 6)
; CHECK-LABEL: name: uzp2_undef
; CHECK: liveins: $q0, $q1
- ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
- ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $q0 = COPY [[UZP2_]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
- %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7)
- $q0 = COPY %1(<4 x s32>)
+ %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7)
+ $q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0