From: Craig Topper Date: Sun, 9 Dec 2018 18:02:37 +0000 (+0000) Subject: [X86] If the carry input to an addcarry/subborrow intrinsic is known to be 0, emit... X-Git-Tag: llvmorg-8.0.0-rc1~2505 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2b09d17d93d8a3cdc98747a4d2d4d6be5840087f;p=platform%2Fupstream%2Fllvm.git [X86] If the carry input to an addcarry/subborrow intrinsic is known to be 0, emit a flag setting ADD/SUB instead of ADC/SBB. Previously we had to take the carry in and add -1 to it to set the carry flag so we could use it with ADC/SBB. But if we know its 0 then we don't need to bother. This should go a long way towards fixing PR24545. llvm-svn: 348727 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4590980..a9bc93f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21932,10 +21932,19 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case ADX: { SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32); - SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1), - DAG.getConstant(-1, dl, MVT::i8)); - SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2), - Op.getOperand(3), GenCF.getValue(1)); + + SDValue Res; + // If the carry in is zero, then we should just use ADD/SUB instead of + // ADC/SBB. + if (isNullConstant(Op.getOperand(1))) { + Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2), + Op.getOperand(3)); + } else { + SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1), + DAG.getConstant(-1, dl, MVT::i8)); + Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2), + Op.getOperand(3), GenCF.getValue(1)); + } SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG); SDValue Results[] = { SetCC, Res }; return DAG.getMergeValues(Results, dl); diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 252d648..e3e2961 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -286,10 +286,10 @@ static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) { * the alphabetical order. */ static const IntrinsicData IntrinsicsWithoutChain[] = { - X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, 0), - X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0), - X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0), - X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0), + X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, X86ISD::ADD), + X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, X86ISD::ADD), + X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, X86ISD::ADD), + X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, X86ISD::ADD), X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0), @@ -1223,8 +1223,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0), X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0), X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), - X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0), - X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0), + X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, X86ISD::SUB), + X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, X86ISD::SUB), X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), diff --git a/llvm/test/CodeGen/X86/adx-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/adx-intrinsics-upgrade.ll index fcb7165..34f8ff8 100644 --- a/llvm/test/CodeGen/X86/adx-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/adx-intrinsics-upgrade.ll @@ -90,13 +90,11 @@ define i8 @test_subborrow_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { define i32 @load_crash(i64* nocapture readonly %a, i64* nocapture readonly %b, i64* %res) { ; CHECK-LABEL: load_crash: ; CHECK: ## %bb.0: -; CHECK-NEXT: movq (%rdi), %rax ## encoding: [0x48,0x8b,0x07] -; CHECK-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9] -; CHECK-NEXT: addb $-1, %cl ## encoding: [0x80,0xc1,0xff] -; CHECK-NEXT: adcq (%rsi), %rax ## encoding: [0x48,0x13,0x06] -; CHECK-NEXT: setb %cl ## encoding: [0x0f,0x92,0xc1] -; CHECK-NEXT: movq %rax, (%rdx) ## encoding: [0x48,0x89,0x02] -; CHECK-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1] +; CHECK-NEXT: movq (%rdi), %rcx ## encoding: [0x48,0x8b,0x0f] +; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] +; CHECK-NEXT: addq (%rsi), %rcx ## encoding: [0x48,0x03,0x0e] +; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; CHECK-NEXT: movq %rcx, (%rdx) ## encoding: [0x48,0x89,0x0a] ; CHECK-NEXT: retq ## encoding: [0xc3] %1 = load i64, i64* %a, align 8 %2 = load i64, i64* %b, align 8 @@ -111,9 +109,7 @@ define void @allzeros() { ; CHECK-LABEL: allzeros: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; CHECK-NEXT: addb $-1, %al ## encoding: [0x04,0xff] -; CHECK-NEXT: sbbq %rax, %rax ## encoding: [0x48,0x19,0xc0] -; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; CHECK-NEXT: addq $0, %rax ## encoding: [0x48,0x83,0xc0,0x00] ; CHECK-NEXT: movq %rax, 0 ## encoding: [0x48,0x89,0x04,0x25,0x00,0x00,0x00,0x00] ; CHECK-NEXT: retq ## encoding: [0xc3] entry: diff --git a/llvm/test/CodeGen/X86/adx-intrinsics.ll b/llvm/test/CodeGen/X86/adx-intrinsics.ll index ba820d0..d6b75e1 100644 --- a/llvm/test/CodeGen/X86/adx-intrinsics.ll +++ b/llvm/test/CodeGen/X86/adx-intrinsics.ll @@ -148,13 +148,11 @@ define i8 @test_subborrow_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { define i32 @load_crash(i64* nocapture readonly %a, i64* nocapture readonly %b, i64* %res) { ; CHECK-LABEL: load_crash: ; CHECK: ## %bb.0: -; CHECK-NEXT: movq (%rdi), %rax ## encoding: [0x48,0x8b,0x07] -; CHECK-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9] -; CHECK-NEXT: addb $-1, %cl ## encoding: [0x80,0xc1,0xff] -; CHECK-NEXT: adcq (%rsi), %rax ## encoding: [0x48,0x13,0x06] -; CHECK-NEXT: setb %cl ## encoding: [0x0f,0x92,0xc1] -; CHECK-NEXT: movq %rax, (%rdx) ## encoding: [0x48,0x89,0x02] -; CHECK-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1] +; CHECK-NEXT: movq (%rdi), %rcx ## encoding: [0x48,0x8b,0x0f] +; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] +; CHECK-NEXT: addq (%rsi), %rcx ## encoding: [0x48,0x03,0x0e] +; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; CHECK-NEXT: movq %rcx, (%rdx) ## encoding: [0x48,0x89,0x0a] ; CHECK-NEXT: retq ## encoding: [0xc3] %1 = load i64, i64* %a, align 8 %2 = load i64, i64* %b, align 8 @@ -173,9 +171,7 @@ define void @allzeros() { ; CHECK-LABEL: allzeros: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; CHECK-NEXT: addb $-1, %al ## encoding: [0x04,0xff] -; CHECK-NEXT: sbbq %rax, %rax ## encoding: [0x48,0x19,0xc0] -; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; CHECK-NEXT: addq $0, %rax ## encoding: [0x48,0x83,0xc0,0x00] ; CHECK-NEXT: movq %rax, 0 ## encoding: [0x48,0x89,0x04,0x25,0x00,0x00,0x00,0x00] ; CHECK-NEXT: retq ## encoding: [0xc3] entry: