setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
+ if (HasInt256) {
+ setOperationAction(ISD::AVGCEILU, MVT::v16i16, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v32i8, Legal);
+ }
setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
+ if (HasBWI) {
+ setOperationAction(ISD::AVGCEILU, MVT::v32i16, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v64i8, Legal);
+ }
setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
Results.push_back(Res);
return;
}
- case X86ISD::VPMADDWD:
- case X86ISD::AVG: {
- // Legalize types for X86ISD::AVG/VPMADDWD by widening.
+ case X86ISD::VPMADDWD: {
+ // Legalize types for X86ISD::VPMADDWD by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT VT = N->getValueType(0);
NODE_NAME_CASE(SCALEF_RND)
NODE_NAME_CASE(SCALEFS)
NODE_NAME_CASE(SCALEFS_RND)
- NODE_NAME_CASE(AVG)
NODE_NAME_CASE(MULHRS)
NODE_NAME_CASE(SINT_TO_FP_RND)
NODE_NAME_CASE(UINT_TO_FP_RND)
bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
switch (Opcode) {
// TODO: Add more X86ISD opcodes once we have test coverage.
- case X86ISD::AVG:
case X86ISD::PCMPEQ:
case X86ISD::PMULDQ:
case X86ISD::PMULUDQ:
case X86ISD::UNPCKH:
case X86ISD::BLENDI:
// Integer ops.
- case X86ISD::AVG:
case X86ISD::PACKSS:
case X86ISD::PACKUS:
// Horizontal Ops.
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
-/// X86ISD::AVG instruction.
+/// ISD::AVGCEILU (AVG) instruction.
static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
const SDLoc &DL) {
auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
- return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops);
+ return DAG.getNode(ISD::AVGCEILU, DL, Ops[0].getValueType(), Ops);
};
auto AVGSplitter = [&](std::array<SDValue, 2> Ops) {
SCALEFS,
SCALEFS_RND,
- // Unsigned Integer average.
- AVG,
-
/// Integer horizontal add/sub.
HADD,
HSUB,
HasBWI, 1>;
defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
SchedWriteVecIMul, HasBWI, 1>, T8PD;
-defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
+defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
SchedWriteVecALU, HasBWI, 1>;
defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
SchedWriteVecIMul, HasAVX512, 1>, T8PD;
SDTCisSameAs<2, 1>]>;
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
-def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
+defm PAVGB : PDI_binop_all<0xE0, "pavgb", avgceilu, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
+defm PAVGW : PDI_binop_all<0xE3, "pavgw", avgceilu, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
SchedWriteVecIMul, 1, NoVLX>;
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
- X86_INTRINSIC_DATA(avx2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_b, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_w, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
X86_INTRINSIC_DATA(avx2_pblendvb, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_pavg_b_512, INTR_TYPE_2OP, X86ISD::AVG, 0),
- X86_INTRINSIC_DATA(avx512_pavg_w_512, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx512_pavg_b_512, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
+ X86_INTRINSIC_DATA(avx512_pavg_w_512, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
- X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
define void @avg_v16i8(<16 x i8>* %a, <16 x i8>* %b) nounwind {
; SSE2-LABEL: avg_v16i8:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa (%rsi), %xmm0
-; SSE2-NEXT: pavgb (%rdi), %xmm0
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: pavgb (%rsi), %xmm0
; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
;
-; AVX-LABEL: avg_v16i8:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa (%rsi), %xmm0
-; AVX-NEXT: vpavgb (%rdi), %xmm0, %xmm0
-; AVX-NEXT: vmovdqu %xmm0, (%rax)
-; AVX-NEXT: retq
+; AVX1-LABEL: avg_v16i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa (%rdi), %xmm0
+; AVX1-NEXT: vpavgb (%rsi), %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: avg_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovdqa (%rdi), %xmm0
+; AVX2-NEXT: vpavgb (%rsi), %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: avg_v16i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovdqa (%rsi), %xmm0
+; AVX512-NEXT: vpavgb (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovdqu %xmm0, (%rax)
+; AVX512-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a
%2 = load <16 x i8>, <16 x i8>* %b
%3 = zext <16 x i8> %1 to <16 x i32>
;
; AVX2-LABEL: avg_v32i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa (%rsi), %ymm0
-; AVX2-NEXT: vpavgb (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa (%rdi), %ymm0
+; AVX2-NEXT: vpavgb (%rsi), %ymm0, %ymm0
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: avg_v32i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqa (%rsi), %ymm0
-; AVX512-NEXT: vpavgb (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa (%rdi), %ymm0
+; AVX512-NEXT: vpavgb (%rsi), %ymm0, %ymm0
; AVX512-NEXT: vmovdqu %ymm0, (%rax)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512BW-LABEL: avg_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0
-; AVX512BW-NEXT: vpavgb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpavgb (%rsi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
define void @avg_v8i16(<8 x i16>* %a, <8 x i16>* %b) nounwind {
; SSE2-LABEL: avg_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa (%rsi), %xmm0
-; SSE2-NEXT: pavgw (%rdi), %xmm0
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: pavgw (%rsi), %xmm0
; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
;
; AVX-LABEL: avg_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa (%rsi), %xmm0
-; AVX-NEXT: vpavgw (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpavgw (%rsi), %xmm0, %xmm0
; AVX-NEXT: vmovdqu %xmm0, (%rax)
; AVX-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a
;
; AVX2-LABEL: avg_v16i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa (%rsi), %ymm0
-; AVX2-NEXT: vpavgw (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa (%rdi), %ymm0
+; AVX2-NEXT: vpavgw (%rsi), %ymm0, %ymm0
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512BW-LABEL: avg_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0
-; AVX512BW-NEXT: vpavgw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
+; AVX512BW-NEXT: vpavgw (%rsi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: avg_v40i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa 64(%rsi), %xmm0
-; AVX512F-NEXT: vpavgw 64(%rdi), %xmm0, %xmm0
-; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
-; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm2
-; AVX512F-NEXT: vpavgw (%rdi), %ymm1, %ymm1
-; AVX512F-NEXT: vpavgw 32(%rdi), %ymm2, %ymm2
-; AVX512F-NEXT: vmovdqu %ymm2, (%rax)
+; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
+; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1
+; AVX512F-NEXT: vpavgw (%rdi), %ymm0, %ymm0
+; AVX512F-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1
+; AVX512F-NEXT: vmovdqa 64(%rsi), %xmm2
+; AVX512F-NEXT: vpavgw 64(%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vmovdqu %ymm1, (%rax)
-; AVX512F-NEXT: vmovdqu %xmm0, (%rax)
+; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512F-NEXT: vmovdqu %xmm2, (%rax)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
;
; AVX-LABEL: PR52131_pavg_chain:
; AVX: # %bb.0:
-; AVX-NEXT: vpavgw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpavgw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpavgw %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
%i = zext <8 x i16> %a to <8 x i32>
define dso_local void @avg_v64i8_512(<64 x i8>* %a, <64 x i8>* %b) "min-legal-vector-width"="512" {
; CHECK-LABEL: avg_v64i8_512:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa64 (%rsi), %zmm0
-; CHECK-NEXT: vpavgb (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0
+; CHECK-NEXT: vpavgb (%rsi), %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, (%rax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq