unknown at compile-time due to the definition of shufflevector in
LLVM IR.
+vshf.[bhwd]
+ When the shuffle description describes a splat operation, splat.[bhwd]
+ instructions will be selected instead of vshf.[bhwd]. Unlike the ilv*,
+ and pck* instructions, this is matched from MipsISD::VSHF instead of
+ a special-case MipsISD node.
+
ilvl.d, pckev.d:
It is not possible to emit ilvl.d, or pckev.d since ilvev.d covers the
same shuffle. ilvev.d will be emitted instead.
It is not possible to emit ilvr.d, or pckod.d since ilvod.d covers the
same shuffle. ilvod.d will be emitted instead.
+splat.[bhwd]
+ The intrinsic will work as expected. However, unlike other intrinsics
+ it lowers directly to MipsISD::VSHF instead of using common IR.
+
splati.w:
It is not possible to emit splati.w since shf.w covers the same cases.
shf.w will be emitted instead.
def vsplatf64 : PatFrag<(ops node:$e0),
(v2f64 (build_vector node:$e0, node:$e0))>;
+def vsplati8_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati8 node:$i), node:$v, node:$v)>;
+def vsplati16_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati16 node:$i), node:$v, node:$v)>;
+def vsplati32_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati32 node:$i), node:$v, node:$v)>;
+def vsplati64_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati64 node:$i), node:$v, node:$v)>;
+
class SplatPatLeaf<Operand opclass, dag frag, code pred = [{}],
SDNodeXForm xform = NOOP_SDNodeXForm>
: PatLeaf<frag, pred, xform> {
InstrItinClass Itinerary = itin;
}
+class MSA_3R_SPLAT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, GPR32:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$rt]");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, GPR32:$rt))];
+ InstrItinClass Itinerary = itin;
+}
+
class MSA_3R_VSHF_DESC_BASE<string instr_asm, RegisterOperand ROWD,
RegisterOperand ROWS = ROWD,
RegisterOperand ROWT = ROWD,
InstrItinClass Itinerary = itin;
}
-class MSA_3R_INDEX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS,
- RegisterOperand RORT,
- InstrItinClass itin = NoItinerary> {
+class MSA_3R_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWS:$ws, RORT:$rt);
+ dag InOperandList = (ins ROWS:$ws, GPR32:$rt);
string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$rt]");
- list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, RORT:$rt))];
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, GPR32:$rt))];
InstrItinClass Itinerary = itin;
}
class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
class SHF_W_DESC : MSA_I8_SHF_DESC_BASE<"shf.w", MSA128WOpnd>;
-class SLD_B_DESC : MSA_3R_INDEX_DESC_BASE<"sld.b", int_mips_sld_b, MSA128BOpnd,
- MSA128BOpnd, GPR32Opnd>;
-class SLD_H_DESC : MSA_3R_INDEX_DESC_BASE<"sld.h", int_mips_sld_h, MSA128HOpnd,
- MSA128HOpnd, GPR32Opnd>;
-class SLD_W_DESC : MSA_3R_INDEX_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd,
- MSA128WOpnd, GPR32Opnd>;
-class SLD_D_DESC : MSA_3R_INDEX_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd,
- MSA128DOpnd, GPR32Opnd>;
+class SLD_B_DESC : MSA_3R_SLD_DESC_BASE<"sld.b", int_mips_sld_b, MSA128BOpnd>;
+class SLD_H_DESC : MSA_3R_SLD_DESC_BASE<"sld.h", int_mips_sld_h, MSA128HOpnd>;
+class SLD_W_DESC : MSA_3R_SLD_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>;
+class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
class SLDI_B_DESC : MSA_ELM_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128BOpnd>;
class SLDI_H_DESC : MSA_ELM_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128HOpnd>;
class SLLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.d", shl, vsplati64_uimm6,
MSA128DOpnd>;
-class SPLAT_B_DESC : MSA_3R_INDEX_DESC_BASE<"splat.b", int_mips_splat_b,
- MSA128BOpnd, MSA128BOpnd,
- GPR32Opnd>;
-class SPLAT_H_DESC : MSA_3R_INDEX_DESC_BASE<"splat.h", int_mips_splat_h,
- MSA128HOpnd, MSA128HOpnd,
- GPR32Opnd>;
-class SPLAT_W_DESC : MSA_3R_INDEX_DESC_BASE<"splat.w", int_mips_splat_w,
- MSA128WOpnd, MSA128WOpnd,
- GPR32Opnd>;
-class SPLAT_D_DESC : MSA_3R_INDEX_DESC_BASE<"splat.d", int_mips_splat_d,
- MSA128DOpnd, MSA128DOpnd,
- GPR32Opnd>;
+class SPLAT_B_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.b", vsplati8_elt,
+ MSA128BOpnd>;
+class SPLAT_H_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.h", vsplati16_elt,
+ MSA128HOpnd>;
+class SPLAT_W_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.w", vsplati32_elt,
+ MSA128WOpnd>;
+class SPLAT_D_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.d", vsplati64_elt,
+ MSA128DOpnd>;
class SPLATI_B_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.b", vsplati8_uimm4,
MSA128BOpnd>;
case Intrinsic::mips_slli_d:
return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_splat_b:
+ case Intrinsic::mips_splat_h:
+ case Intrinsic::mips_splat_w:
+ case Intrinsic::mips_splat_d:
+ // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
+ // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
+ // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
+ // Instead we lower to MipsISD::VSHF and match from there.
+ return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
+ lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
+ Op->getOperand(1));
case Intrinsic::mips_splati_b:
case Intrinsic::mips_splati_h:
case Intrinsic::mips_splati_w:
; Test the MSA splat intrinsics that are encoded with the 3R instruction
; format.
-; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | \
+; RUN: FileCheck -check-prefix=MIPS32 %s
@llvm_mips_splat_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_splat_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
-define void @llvm_mips_splat_b_test() nounwind {
+define void @llvm_mips_splat_b_test(i32 %a) nounwind {
entry:
%0 = load <16 x i8>* @llvm_mips_splat_b_ARG1
- %1 = tail call <16 x i8> @llvm.mips.splat.b(<16 x i8> %0, i32 3)
+ %1 = tail call <16 x i8> @llvm.mips.splat.b(<16 x i8> %0, i32 %a)
store <16 x i8> %1, <16 x i8>* @llvm_mips_splat_b_RES
ret void
}
declare <16 x i8> @llvm.mips.splat.b(<16 x i8>, i32) nounwind
-; CHECK: llvm_mips_splat_b_test:
-; CHECK: ld.b
-; CHECK: splat.b
-; CHECK: st.b
-; CHECK: .size llvm_mips_splat_b_test
-;
+; MIPS32: llvm_mips_splat_b_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_splat_b_ARG1)(
+; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_splat_b_RES)(
+; MIPS32-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: splat.b [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS32-DAG: st.b [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_b_test
+
@llvm_mips_splat_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@llvm_mips_splat_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
-define void @llvm_mips_splat_h_test() nounwind {
+define void @llvm_mips_splat_h_test(i32 %a) nounwind {
entry:
%0 = load <8 x i16>* @llvm_mips_splat_h_ARG1
- %1 = tail call <8 x i16> @llvm.mips.splat.h(<8 x i16> %0, i32 3)
+ %1 = tail call <8 x i16> @llvm.mips.splat.h(<8 x i16> %0, i32 %a)
store <8 x i16> %1, <8 x i16>* @llvm_mips_splat_h_RES
ret void
}
declare <8 x i16> @llvm.mips.splat.h(<8 x i16>, i32) nounwind
-; CHECK: llvm_mips_splat_h_test:
-; CHECK: ld.h
-; CHECK: splat.h
-; CHECK: st.h
-; CHECK: .size llvm_mips_splat_h_test
-;
+; MIPS32: llvm_mips_splat_h_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_splat_h_ARG1)(
+; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_splat_h_RES)(
+; MIPS32-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: splat.h [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS32-DAG: st.h [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_h_test
+
@llvm_mips_splat_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@llvm_mips_splat_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
-define void @llvm_mips_splat_w_test() nounwind {
+define void @llvm_mips_splat_w_test(i32 %a) nounwind {
entry:
%0 = load <4 x i32>* @llvm_mips_splat_w_ARG1
- %1 = tail call <4 x i32> @llvm.mips.splat.w(<4 x i32> %0, i32 3)
+ %1 = tail call <4 x i32> @llvm.mips.splat.w(<4 x i32> %0, i32 %a)
store <4 x i32> %1, <4 x i32>* @llvm_mips_splat_w_RES
ret void
}
declare <4 x i32> @llvm.mips.splat.w(<4 x i32>, i32) nounwind
-; CHECK: llvm_mips_splat_w_test:
-; CHECK: ld.w
-; CHECK: splat.w
-; CHECK: st.w
-; CHECK: .size llvm_mips_splat_w_test
-;
+; MIPS32: llvm_mips_splat_w_test:
+; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_splat_w_ARG1)(
+; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_splat_w_RES)(
+; MIPS32-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: splat.w [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS32-DAG: st.w [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_w_test
+
@llvm_mips_splat_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
@llvm_mips_splat_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
-define void @llvm_mips_splat_d_test() nounwind {
+define void @llvm_mips_splat_d_test(i32 %a) nounwind {
entry:
%0 = load <2 x i64>* @llvm_mips_splat_d_ARG1
- %1 = tail call <2 x i64> @llvm.mips.splat.d(<2 x i64> %0, i32 3)
+ %1 = tail call <2 x i64> @llvm.mips.splat.d(<2 x i64> %0, i32 %a)
store <2 x i64> %1, <2 x i64>* @llvm_mips_splat_d_RES
ret void
}
declare <2 x i64> @llvm.mips.splat.d(<2 x i64>, i32) nounwind
-; CHECK: llvm_mips_splat_d_test:
-; CHECK: ld.d
-; CHECK: splat.d
-; CHECK: st.d
-; CHECK: .size llvm_mips_splat_d_test
-;
+; MIPS32: llvm_mips_splat_d_test:
+; FIXME: This test is currently disabled for MIPS32 because the indices are
+; difficult to match. This is because 64-bit values cannot be stored in
+; GPR32.
+; MIPS64-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_splat_d_ARG1)(
+; MIPS64-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_splat_d_RES)(
+; MIPS64-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS64-DAG: splat.d [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS64-DAG: st.d [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_d_test