setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return LowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SPLAT_VECTOR:
Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
}
+// Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
+// but we don't have an appropriate instruction,
+// so custom-lower it as ZIP1-with-zeros.
+SDValue
+AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ SDValue SrcOp = Op.getOperand(0);
+ EVT SrcVT = SrcOp.getValueType();
+ assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
+ "Unexpected extension factor.");
+ unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
+ // FIXME: support multi-step zipping?
+ if (Scale != 2)
+ return SDValue();
+ SDValue Zeros = DAG.getConstant(0, dl, SrcVT);
+ return DAG.getBitcast(VT,
+ DAG.getNode(AArch64ISD::ZIP1, dl, SrcVT, SrcOp, Zeros));
+}
+
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
; Check that this pattern is recognized as a VZIP and
; that the vector blend transform does not scramble the pattern.
-; FIXME: we can not recognize generic ZERO_EXTEND_VECTOR_INREG legalization
-; as a zip1.
; CHECK-LABEL: vzipNoBlend:
-; CHECK-NOT: zip1
+; CHECK: zip1
define <8 x i8> @vzipNoBlend(ptr %A, ptr %B) nounwind {
%t = load <8 x i8>, ptr %A
%vzip = shufflevector <8 x i8> %t, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i8> %vzip
}
-; FIXME: this is identical to @vzipNoBlend
; CHECK-LABEL: vzipStillZExt:
-; CHECK-NOT: zip1
+; CHECK: zip1
define <8 x i8> @vzipStillZExt(ptr %A, ptr %B) nounwind {
%t = load <8 x i8>, ptr %A
%vzip = shufflevector <8 x i8> %t, <8 x i8> <i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 9, i32 1, i32 9, i32 2, i32 9, i32 3, i32 9>