From 6d4ebada797f22f1bdb9bf07f7e51671d6af458c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Feb 2020 15:13:42 -0500 Subject: [PATCH] AMDGPU: Use conditions directly in division expansion This was creating a select on true/false values, and then comparing that later. This produced more work for later combines, which can be avoided by just using the boolean values. This was copied from the original DAG expansion, which also has the same problem. This doesn't have a observable change using SelectionDAG, but since GlobalISel is missing these optimizations, the final code was noticeably longer. --- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 29 +- .../amdgpu-codegenprepare-fold-binop-select.ll | 42 +- .../CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll | 1628 ++++++++++---------- 3 files changed, 802 insertions(+), 897 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index adf2648..8840b0a1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -987,7 +987,6 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder, ConstantInt *Zero = Builder.getInt32(0); ConstantInt *One = Builder.getInt32(1); - ConstantInt *MinusOne = Builder.getInt32(~0); Value *Sign = nullptr; if (IsSigned) { @@ -1048,18 +1047,14 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder, // Remainder = Num - Num_S_Remainder Value *Remainder = Builder.CreateSub(Num, Num_S_Remainder); - // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) - Value *Rem_GE_Den_CC = Builder.CreateICmpUGE(Remainder, Den); - Value *Remainder_GE_Den = Builder.CreateSelect(Rem_GE_Den_CC, MinusOne, Zero); + // Remainder_GE_Den = Remainder >= Den; + Value *Remainder_GE_Den = Builder.CreateICmpUGE(Remainder, Den); - // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) - Value *Num_GE_Num_S_Rem_CC = Builder.CreateICmpUGE(Num, Num_S_Remainder); - Value *Remainder_GE_Zero = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, - MinusOne, Zero); + // Remainder_GE_Zero = Num >= Num_S_Remainder + Value *Remainder_GE_Zero = Builder.CreateICmpUGE(Num, Num_S_Remainder); // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero Value *Tmp1 = Builder.CreateAnd(Remainder_GE_Den, Remainder_GE_Zero); - Value *Tmp1_0_CC = Builder.CreateICmpEQ(Tmp1, Zero); Value *Res; if (IsDiv) { @@ -1069,11 +1064,11 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder, // Quotient_S_One = Quotient - 1 Value *Quotient_S_One = Builder.CreateSub(Quotient, One); - // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) - Value *Div = Builder.CreateSelect(Tmp1_0_CC, Quotient, Quotient_A_One); + // Div = (Tmp1 ? Quotient_A_One : Quotient) + Value *Div = Builder.CreateSelect(Tmp1, Quotient_A_One, Quotient); - // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) - Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Div, Quotient_S_One); + // Div = (Remainder_GE_Zero ? Div : Quotient_S_One) + Res = Builder.CreateSelect(Remainder_GE_Zero, Div, Quotient_S_One); } else { // Remainder_S_Den = Remainder - Den Value *Remainder_S_Den = Builder.CreateSub(Remainder, Den); @@ -1081,11 +1076,11 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder, // Remainder_A_Den = Remainder + Den Value *Remainder_A_Den = Builder.CreateAdd(Remainder, Den); - // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) - Value *Rem = Builder.CreateSelect(Tmp1_0_CC, Remainder, Remainder_S_Den); + // Rem = (Tmp1 ? Remainder_S_Den : Remainder) + Value *Rem = Builder.CreateSelect(Tmp1, Remainder_S_Den, Remainder); - // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) - Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Rem, Remainder_A_Den); + // Rem = (Remainder_GE_Zero ? Rem : Remainder_A_Den) + Res = Builder.CreateSelect(Remainder_GE_Zero, Rem, Remainder_A_Den); } if (IsSigned) { diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll index fbe9f52..1b6a1a7 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll @@ -121,18 +121,15 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) { ; IR-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]] ; IR-NEXT: [[TMP33:%.*]] = sub i32 1000000, [[TMP32]] ; IR-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]] -; IR-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0 -; IR-NEXT: [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]] -; IR-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0 -; IR-NEXT: [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]] -; IR-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0 -; IR-NEXT: [[TMP40:%.*]] = add i32 [[TMP31]], 1 -; IR-NEXT: [[TMP41:%.*]] = sub i32 [[TMP31]], 1 -; IR-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]] -; IR-NEXT: [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]] -; IR-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]] -; IR-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]] -; IR-NEXT: ret i32 [[TMP45]] +; IR-NEXT: [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]] +; IR-NEXT: [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]] +; IR-NEXT: [[TMP37:%.*]] = add i32 [[TMP31]], 1 +; IR-NEXT: [[TMP38:%.*]] = sub i32 [[TMP31]], 1 +; IR-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]] +; IR-NEXT: [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]] +; IR-NEXT: [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]] +; IR-NEXT: [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]] +; IR-NEXT: ret i32 [[TMP42]] ; ; GCN-LABEL: select_sdiv_lhs_opaque_const0_i32: ; GCN: ; %bb.0: @@ -219,18 +216,15 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) { ; IR-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]] ; IR-NEXT: [[TMP33:%.*]] = sub i32 1000000, [[TMP32]] ; IR-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]] -; IR-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0 -; IR-NEXT: [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]] -; IR-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0 -; IR-NEXT: [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]] -; IR-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0 -; IR-NEXT: [[TMP40:%.*]] = add i32 [[TMP31]], 1 -; IR-NEXT: [[TMP41:%.*]] = sub i32 [[TMP31]], 1 -; IR-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]] -; IR-NEXT: [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]] -; IR-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]] -; IR-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]] -; IR-NEXT: ret i32 [[TMP45]] +; IR-NEXT: [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]] +; IR-NEXT: [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]] +; IR-NEXT: [[TMP37:%.*]] = add i32 [[TMP31]], 1 +; IR-NEXT: [[TMP38:%.*]] = sub i32 [[TMP31]], 1 +; IR-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]] +; IR-NEXT: [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]] +; IR-NEXT: [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]] +; IR-NEXT: [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]] +; IR-NEXT: ret i32 [[TMP42]] ; ; GCN-LABEL: select_sdiv_lhs_opaque_const1_i32: ; GCN: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index 6ee3964..daf56e4 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -36,16 +36,13 @@ define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], [[Y]] ; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[X]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[TMP30]], [[Y]] -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[X]], [[TMP29]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP32]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], 0 -; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP28]], 1 -; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP28]], 1 -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP28]], i32 [[TMP37]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP33]], i32 [[TMP39]], i32 [[TMP38]] -; CHECK-NEXT: store i32 [[TMP40]], i32 addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp uge i32 [[X]], [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP28]], 1 +; CHECK-NEXT: [[TMP35:%.*]] = sub i32 [[TMP28]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP33]], i32 [[TMP34]], i32 [[TMP28]] +; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP32]], i32 [[TMP36]], i32 [[TMP35]] +; CHECK-NEXT: store i32 [[TMP37]], i32 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: udiv_i32: @@ -118,16 +115,13 @@ define amdgpu_kernel void @urem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], [[Y]] ; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[X]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[TMP30]], [[Y]] -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[X]], [[TMP29]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP32]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], 0 -; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP30]], [[Y]] -; CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP30]], [[Y]] -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP30]], i32 [[TMP37]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP33]], i32 [[TMP39]], i32 [[TMP38]] -; CHECK-NEXT: store i32 [[TMP40]], i32 addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp uge i32 [[X]], [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = sub i32 [[TMP30]], [[Y]] +; CHECK-NEXT: [[TMP35:%.*]] = add i32 [[TMP30]], [[Y]] +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP33]], i32 [[TMP34]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP32]], i32 [[TMP36]], i32 [[TMP35]] +; CHECK-NEXT: store i32 [[TMP37]], i32 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: urem_i32: @@ -207,18 +201,15 @@ define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[TMP35]], [[TMP7]] ; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP6]], [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = icmp uge i32 [[TMP37]], [[TMP7]] -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP6]], [[TMP36]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], [[TMP41]] -; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[TMP42]], 0 -; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP35]], 1 -; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP35]], 1 -; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP43]], i32 [[TMP35]], i32 [[TMP44]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP40]], i32 [[TMP46]], i32 [[TMP45]] -; CHECK-NEXT: [[TMP48:%.*]] = xor i32 [[TMP47]], [[TMP3]] -; CHECK-NEXT: [[TMP49:%.*]] = sub i32 [[TMP48]], [[TMP3]] -; CHECK-NEXT: store i32 [[TMP49]], i32 addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP6]], [[TMP36]] +; CHECK-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP35]], 1 +; CHECK-NEXT: [[TMP42:%.*]] = sub i32 [[TMP35]], 1 +; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP40]], i32 [[TMP41]], i32 [[TMP35]] +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP39]], i32 [[TMP43]], i32 [[TMP42]] +; CHECK-NEXT: [[TMP45:%.*]] = xor i32 [[TMP44]], [[TMP3]] +; CHECK-NEXT: [[TMP46:%.*]] = sub i32 [[TMP45]], [[TMP3]] +; CHECK-NEXT: store i32 [[TMP46]], i32 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: sdiv_i32: @@ -306,18 +297,15 @@ define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[TMP34]], [[TMP6]] ; CHECK-NEXT: [[TMP36:%.*]] = sub i32 [[TMP5]], [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = icmp uge i32 [[TMP36]], [[TMP6]] -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP5]], [[TMP35]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP38]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 0 -; CHECK-NEXT: [[TMP43:%.*]] = sub i32 [[TMP36]], [[TMP6]] -; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP36]], [[TMP6]] -; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP42]], i32 [[TMP36]], i32 [[TMP43]] -; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP39]], i32 [[TMP45]], i32 [[TMP44]] -; CHECK-NEXT: [[TMP47:%.*]] = xor i32 [[TMP46]], [[TMP1]] -; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], [[TMP1]] -; CHECK-NEXT: store i32 [[TMP48]], i32 addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp uge i32 [[TMP5]], [[TMP35]] +; CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP36]], [[TMP6]] +; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP36]], [[TMP6]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP40]], i32 [[TMP36]] +; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP38]], i32 [[TMP42]], i32 [[TMP41]] +; CHECK-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP1]] +; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP1]] +; CHECK-NEXT: store i32 [[TMP45]], i32 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: srem_i32: @@ -827,146 +815,134 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP30]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> undef, i32 [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[Y]], i64 1 -; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float -; CHECK-NEXT: [[TMP47:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP46]]) -; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 -; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 -; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 -; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] -; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 -; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 -; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 -; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 -; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 -; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 -; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] -; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 -; CHECK-NEXT: [[TMP82:%.*]] = add i32 [[TMP73]], 1 -; CHECK-NEXT: [[TMP83:%.*]] = sub i32 [[TMP73]], 1 -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP73]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP85]], i64 1 -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i32> [[X]], i64 2 -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i32> [[Y]], i64 2 -; CHECK-NEXT: [[TMP89:%.*]] = uitofp i32 [[TMP88]] to float -; CHECK-NEXT: [[TMP90:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP89]]) -; CHECK-NEXT: [[TMP91:%.*]] = fmul fast float [[TMP90]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP92:%.*]] = fptoui float [[TMP91]] to i32 -; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i64 -; CHECK-NEXT: [[TMP94:%.*]] = zext i32 [[TMP88]] to i64 -; CHECK-NEXT: [[TMP95:%.*]] = mul i64 [[TMP93]], [[TMP94]] -; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i32 -; CHECK-NEXT: [[TMP97:%.*]] = lshr i64 [[TMP95]], 32 -; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i32 -; CHECK-NEXT: [[TMP99:%.*]] = sub i32 0, [[TMP96]] -; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP98]], 0 -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[TMP99]], i32 [[TMP96]] -; CHECK-NEXT: [[TMP102:%.*]] = zext i32 [[TMP101]] to i64 -; CHECK-NEXT: [[TMP103:%.*]] = zext i32 [[TMP92]] to i64 -; CHECK-NEXT: [[TMP104:%.*]] = mul i64 [[TMP102]], [[TMP103]] -; CHECK-NEXT: [[TMP105:%.*]] = trunc i64 [[TMP104]] to i32 -; CHECK-NEXT: [[TMP106:%.*]] = lshr i64 [[TMP104]], 32 -; CHECK-NEXT: [[TMP107:%.*]] = trunc i64 [[TMP106]] to i32 -; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP92]], [[TMP107]] -; CHECK-NEXT: [[TMP109:%.*]] = sub i32 [[TMP92]], [[TMP107]] -; CHECK-NEXT: [[TMP110:%.*]] = select i1 [[TMP100]], i32 [[TMP108]], i32 [[TMP109]] -; CHECK-NEXT: [[TMP111:%.*]] = zext i32 [[TMP110]] to i64 -; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP87]] to i64 -; CHECK-NEXT: [[TMP113:%.*]] = mul i64 [[TMP111]], [[TMP112]] -; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 -; CHECK-NEXT: [[TMP115:%.*]] = lshr i64 [[TMP113]], 32 -; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i32 -; CHECK-NEXT: [[TMP117:%.*]] = mul i32 [[TMP116]], [[TMP88]] -; CHECK-NEXT: [[TMP118:%.*]] = sub i32 [[TMP87]], [[TMP117]] -; CHECK-NEXT: [[TMP119:%.*]] = icmp uge i32 [[TMP118]], [[TMP88]] -; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP121:%.*]] = icmp uge i32 [[TMP87]], [[TMP117]] -; CHECK-NEXT: [[TMP122:%.*]] = select i1 [[TMP121]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP123:%.*]] = and i32 [[TMP120]], [[TMP122]] -; CHECK-NEXT: [[TMP124:%.*]] = icmp eq i32 [[TMP123]], 0 -; CHECK-NEXT: [[TMP125:%.*]] = add i32 [[TMP116]], 1 -; CHECK-NEXT: [[TMP126:%.*]] = sub i32 [[TMP116]], 1 -; CHECK-NEXT: [[TMP127:%.*]] = select i1 [[TMP124]], i32 [[TMP116]], i32 [[TMP125]] -; CHECK-NEXT: [[TMP128:%.*]] = select i1 [[TMP121]], i32 [[TMP127]], i32 [[TMP126]] -; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP128]], i64 2 -; CHECK-NEXT: [[TMP130:%.*]] = extractelement <4 x i32> [[X]], i64 3 -; CHECK-NEXT: [[TMP131:%.*]] = extractelement <4 x i32> [[Y]], i64 3 -; CHECK-NEXT: [[TMP132:%.*]] = uitofp i32 [[TMP131]] to float -; CHECK-NEXT: [[TMP133:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP132]]) -; CHECK-NEXT: [[TMP134:%.*]] = fmul fast float [[TMP133]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP135:%.*]] = fptoui float [[TMP134]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP36]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> undef, i32 [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i32> [[Y]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = uitofp i32 [[TMP42]] to float +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = fmul fast float [[TMP44]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP46:%.*]] = fptoui float [[TMP45]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = trunc i64 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = lshr i64 [[TMP49]], 32 +; CHECK-NEXT: [[TMP52:%.*]] = trunc i64 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP53:%.*]] = sub i32 0, [[TMP50]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i32 [[TMP52]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[TMP53]], i32 [[TMP50]] +; CHECK-NEXT: [[TMP56:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = lshr i64 [[TMP58]], 32 +; CHECK-NEXT: [[TMP61:%.*]] = trunc i64 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP54]], i32 [[TMP62]], i32 [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP64]] to i64 +; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP41]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = mul i64 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP67]], 32 +; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP71:%.*]] = mul i32 [[TMP70]], [[TMP42]] +; CHECK-NEXT: [[TMP72:%.*]] = sub i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp uge i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp uge i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP75:%.*]] = and i1 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = add i32 [[TMP70]], 1 +; CHECK-NEXT: [[TMP77:%.*]] = sub i32 [[TMP70]], 1 +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP75]], i32 [[TMP76]], i32 [[TMP70]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP74]], i32 [[TMP78]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP79]], i64 1 +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i32> [[X]], i64 2 +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i32> [[Y]], i64 2 +; CHECK-NEXT: [[TMP83:%.*]] = uitofp i32 [[TMP82]] to float +; CHECK-NEXT: [[TMP84:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP83]]) +; CHECK-NEXT: [[TMP85:%.*]] = fmul fast float [[TMP84]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP86:%.*]] = fptoui float [[TMP85]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = zext i32 [[TMP86]] to i64 +; CHECK-NEXT: [[TMP88:%.*]] = zext i32 [[TMP82]] to i64 +; CHECK-NEXT: [[TMP89:%.*]] = mul i64 [[TMP87]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = trunc i64 [[TMP89]] to i32 +; CHECK-NEXT: [[TMP91:%.*]] = lshr i64 [[TMP89]], 32 +; CHECK-NEXT: [[TMP92:%.*]] = trunc i64 [[TMP91]] to i32 +; CHECK-NEXT: [[TMP93:%.*]] = sub i32 0, [[TMP90]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp eq i32 [[TMP92]], 0 +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[TMP93]], i32 [[TMP90]] +; CHECK-NEXT: [[TMP96:%.*]] = zext i32 [[TMP95]] to i64 +; CHECK-NEXT: [[TMP97:%.*]] = zext i32 [[TMP86]] to i64 +; CHECK-NEXT: [[TMP98:%.*]] = mul i64 [[TMP96]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = trunc i64 [[TMP98]] to i32 +; CHECK-NEXT: [[TMP100:%.*]] = lshr i64 [[TMP98]], 32 +; CHECK-NEXT: [[TMP101:%.*]] = trunc i64 [[TMP100]] to i32 +; CHECK-NEXT: [[TMP102:%.*]] = add i32 [[TMP86]], [[TMP101]] +; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP86]], [[TMP101]] +; CHECK-NEXT: [[TMP104:%.*]] = select i1 [[TMP94]], i32 [[TMP102]], i32 [[TMP103]] +; CHECK-NEXT: [[TMP105:%.*]] = zext i32 [[TMP104]] to i64 +; CHECK-NEXT: [[TMP106:%.*]] = zext i32 [[TMP81]] to i64 +; CHECK-NEXT: [[TMP107:%.*]] = mul i64 [[TMP105]], [[TMP106]] +; CHECK-NEXT: [[TMP108:%.*]] = trunc i64 [[TMP107]] to i32 +; CHECK-NEXT: [[TMP109:%.*]] = lshr i64 [[TMP107]], 32 +; CHECK-NEXT: [[TMP110:%.*]] = trunc i64 [[TMP109]] to i32 +; CHECK-NEXT: [[TMP111:%.*]] = mul i32 [[TMP110]], [[TMP82]] +; CHECK-NEXT: [[TMP112:%.*]] = sub i32 [[TMP81]], [[TMP111]] +; CHECK-NEXT: [[TMP113:%.*]] = icmp uge i32 [[TMP112]], [[TMP82]] +; CHECK-NEXT: [[TMP114:%.*]] = icmp uge i32 [[TMP81]], [[TMP111]] +; CHECK-NEXT: [[TMP115:%.*]] = and i1 [[TMP113]], [[TMP114]] +; CHECK-NEXT: [[TMP116:%.*]] = add i32 [[TMP110]], 1 +; CHECK-NEXT: [[TMP117:%.*]] = sub i32 [[TMP110]], 1 +; CHECK-NEXT: [[TMP118:%.*]] = select i1 [[TMP115]], i32 [[TMP116]], i32 [[TMP110]] +; CHECK-NEXT: [[TMP119:%.*]] = select i1 [[TMP114]], i32 [[TMP118]], i32 [[TMP117]] +; CHECK-NEXT: [[TMP120:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP119]], i64 2 +; CHECK-NEXT: [[TMP121:%.*]] = extractelement <4 x i32> [[X]], i64 3 +; CHECK-NEXT: [[TMP122:%.*]] = extractelement <4 x i32> [[Y]], i64 3 +; CHECK-NEXT: [[TMP123:%.*]] = uitofp i32 [[TMP122]] to float +; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP123]]) +; CHECK-NEXT: [[TMP125:%.*]] = fmul fast float [[TMP124]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP126:%.*]] = fptoui float [[TMP125]] to i32 +; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 +; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP122]] to i64 +; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] +; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 +; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 +; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 +; CHECK-NEXT: [[TMP133:%.*]] = sub i32 0, [[TMP130]] +; CHECK-NEXT: [[TMP134:%.*]] = icmp eq i32 [[TMP132]], 0 +; CHECK-NEXT: [[TMP135:%.*]] = select i1 [[TMP134]], i32 [[TMP133]], i32 [[TMP130]] ; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 -; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP131]] to i64 +; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP126]] to i64 ; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] ; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 ; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 ; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 -; CHECK-NEXT: [[TMP142:%.*]] = sub i32 0, [[TMP139]] -; CHECK-NEXT: [[TMP143:%.*]] = icmp eq i32 [[TMP141]], 0 -; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 [[TMP142]], i32 [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = add i32 [[TMP126]], [[TMP141]] +; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP126]], [[TMP141]] +; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP134]], i32 [[TMP142]], i32 [[TMP143]] ; CHECK-NEXT: [[TMP145:%.*]] = zext i32 [[TMP144]] to i64 -; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP135]] to i64 +; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP121]] to i64 ; CHECK-NEXT: [[TMP147:%.*]] = mul i64 [[TMP145]], [[TMP146]] ; CHECK-NEXT: [[TMP148:%.*]] = trunc i64 [[TMP147]] to i32 ; CHECK-NEXT: [[TMP149:%.*]] = lshr i64 [[TMP147]], 32 ; CHECK-NEXT: [[TMP150:%.*]] = trunc i64 [[TMP149]] to i32 -; CHECK-NEXT: [[TMP151:%.*]] = add i32 [[TMP135]], [[TMP150]] -; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP135]], [[TMP150]] -; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP143]], i32 [[TMP151]], i32 [[TMP152]] -; CHECK-NEXT: [[TMP154:%.*]] = zext i32 [[TMP153]] to i64 -; CHECK-NEXT: [[TMP155:%.*]] = zext i32 [[TMP130]] to i64 -; CHECK-NEXT: [[TMP156:%.*]] = mul i64 [[TMP154]], [[TMP155]] -; CHECK-NEXT: [[TMP157:%.*]] = trunc i64 [[TMP156]] to i32 -; CHECK-NEXT: [[TMP158:%.*]] = lshr i64 [[TMP156]], 32 -; CHECK-NEXT: [[TMP159:%.*]] = trunc i64 [[TMP158]] to i32 -; CHECK-NEXT: [[TMP160:%.*]] = mul i32 [[TMP159]], [[TMP131]] -; CHECK-NEXT: [[TMP161:%.*]] = sub i32 [[TMP130]], [[TMP160]] -; CHECK-NEXT: [[TMP162:%.*]] = icmp uge i32 [[TMP161]], [[TMP131]] -; CHECK-NEXT: [[TMP163:%.*]] = select i1 [[TMP162]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP164:%.*]] = icmp uge i32 [[TMP130]], [[TMP160]] -; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP166:%.*]] = and i32 [[TMP163]], [[TMP165]] -; CHECK-NEXT: [[TMP167:%.*]] = icmp eq i32 [[TMP166]], 0 -; CHECK-NEXT: [[TMP168:%.*]] = add i32 [[TMP159]], 1 -; CHECK-NEXT: [[TMP169:%.*]] = sub i32 [[TMP159]], 1 -; CHECK-NEXT: [[TMP170:%.*]] = select i1 [[TMP167]], i32 [[TMP159]], i32 [[TMP168]] -; CHECK-NEXT: [[TMP171:%.*]] = select i1 [[TMP164]], i32 [[TMP170]], i32 [[TMP169]] -; CHECK-NEXT: [[TMP172:%.*]] = insertelement <4 x i32> [[TMP129]], i32 [[TMP171]], i64 3 -; CHECK-NEXT: store <4 x i32> [[TMP172]], <4 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP151:%.*]] = mul i32 [[TMP150]], [[TMP122]] +; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP121]], [[TMP151]] +; CHECK-NEXT: [[TMP153:%.*]] = icmp uge i32 [[TMP152]], [[TMP122]] +; CHECK-NEXT: [[TMP154:%.*]] = icmp uge i32 [[TMP121]], [[TMP151]] +; CHECK-NEXT: [[TMP155:%.*]] = and i1 [[TMP153]], [[TMP154]] +; CHECK-NEXT: [[TMP156:%.*]] = add i32 [[TMP150]], 1 +; CHECK-NEXT: [[TMP157:%.*]] = sub i32 [[TMP150]], 1 +; CHECK-NEXT: [[TMP158:%.*]] = select i1 [[TMP155]], i32 [[TMP156]], i32 [[TMP150]] +; CHECK-NEXT: [[TMP159:%.*]] = select i1 [[TMP154]], i32 [[TMP158]], i32 [[TMP157]] +; CHECK-NEXT: [[TMP160:%.*]] = insertelement <4 x i32> [[TMP120]], i32 [[TMP159]], i64 3 +; CHECK-NEXT: store <4 x i32> [[TMP160]], <4 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: udiv_v4i32: @@ -1111,146 +1087,134 @@ define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP32]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> undef, i32 [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[Y]], i64 1 -; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float -; CHECK-NEXT: [[TMP47:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP46]]) -; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 -; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 -; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 -; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] -; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 -; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 -; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 -; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 -; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 -; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 -; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] -; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 -; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP83:%.*]] = add i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP75]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP85]], i64 1 -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i32> [[X]], i64 2 -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i32> [[Y]], i64 2 -; CHECK-NEXT: [[TMP89:%.*]] = uitofp i32 [[TMP88]] to float -; CHECK-NEXT: [[TMP90:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP89]]) -; CHECK-NEXT: [[TMP91:%.*]] = fmul fast float [[TMP90]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP92:%.*]] = fptoui float [[TMP91]] to i32 -; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i64 -; CHECK-NEXT: [[TMP94:%.*]] = zext i32 [[TMP88]] to i64 -; CHECK-NEXT: [[TMP95:%.*]] = mul i64 [[TMP93]], [[TMP94]] -; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i32 -; CHECK-NEXT: [[TMP97:%.*]] = lshr i64 [[TMP95]], 32 -; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i32 -; CHECK-NEXT: [[TMP99:%.*]] = sub i32 0, [[TMP96]] -; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP98]], 0 -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[TMP99]], i32 [[TMP96]] -; CHECK-NEXT: [[TMP102:%.*]] = zext i32 [[TMP101]] to i64 -; CHECK-NEXT: [[TMP103:%.*]] = zext i32 [[TMP92]] to i64 -; CHECK-NEXT: [[TMP104:%.*]] = mul i64 [[TMP102]], [[TMP103]] -; CHECK-NEXT: [[TMP105:%.*]] = trunc i64 [[TMP104]] to i32 -; CHECK-NEXT: [[TMP106:%.*]] = lshr i64 [[TMP104]], 32 -; CHECK-NEXT: [[TMP107:%.*]] = trunc i64 [[TMP106]] to i32 -; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP92]], [[TMP107]] -; CHECK-NEXT: [[TMP109:%.*]] = sub i32 [[TMP92]], [[TMP107]] -; CHECK-NEXT: [[TMP110:%.*]] = select i1 [[TMP100]], i32 [[TMP108]], i32 [[TMP109]] -; CHECK-NEXT: [[TMP111:%.*]] = zext i32 [[TMP110]] to i64 -; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP87]] to i64 -; CHECK-NEXT: [[TMP113:%.*]] = mul i64 [[TMP111]], [[TMP112]] -; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 -; CHECK-NEXT: [[TMP115:%.*]] = lshr i64 [[TMP113]], 32 -; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i32 -; CHECK-NEXT: [[TMP117:%.*]] = mul i32 [[TMP116]], [[TMP88]] -; CHECK-NEXT: [[TMP118:%.*]] = sub i32 [[TMP87]], [[TMP117]] -; CHECK-NEXT: [[TMP119:%.*]] = icmp uge i32 [[TMP118]], [[TMP88]] -; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP121:%.*]] = icmp uge i32 [[TMP87]], [[TMP117]] -; CHECK-NEXT: [[TMP122:%.*]] = select i1 [[TMP121]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP123:%.*]] = and i32 [[TMP120]], [[TMP122]] -; CHECK-NEXT: [[TMP124:%.*]] = icmp eq i32 [[TMP123]], 0 -; CHECK-NEXT: [[TMP125:%.*]] = sub i32 [[TMP118]], [[TMP88]] -; CHECK-NEXT: [[TMP126:%.*]] = add i32 [[TMP118]], [[TMP88]] -; CHECK-NEXT: [[TMP127:%.*]] = select i1 [[TMP124]], i32 [[TMP118]], i32 [[TMP125]] -; CHECK-NEXT: [[TMP128:%.*]] = select i1 [[TMP121]], i32 [[TMP127]], i32 [[TMP126]] -; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP128]], i64 2 -; CHECK-NEXT: [[TMP130:%.*]] = extractelement <4 x i32> [[X]], i64 3 -; CHECK-NEXT: [[TMP131:%.*]] = extractelement <4 x i32> [[Y]], i64 3 -; CHECK-NEXT: [[TMP132:%.*]] = uitofp i32 [[TMP131]] to float -; CHECK-NEXT: [[TMP133:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP132]]) -; CHECK-NEXT: [[TMP134:%.*]] = fmul fast float [[TMP133]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP135:%.*]] = fptoui float [[TMP134]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = sub i32 [[TMP32]], [[TMP2]] +; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP2]] +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP36]], i32 [[TMP32]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> undef, i32 [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i32> [[Y]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = uitofp i32 [[TMP42]] to float +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = fmul fast float [[TMP44]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP46:%.*]] = fptoui float [[TMP45]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = trunc i64 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = lshr i64 [[TMP49]], 32 +; CHECK-NEXT: [[TMP52:%.*]] = trunc i64 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP53:%.*]] = sub i32 0, [[TMP50]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i32 [[TMP52]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[TMP53]], i32 [[TMP50]] +; CHECK-NEXT: [[TMP56:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = lshr i64 [[TMP58]], 32 +; CHECK-NEXT: [[TMP61:%.*]] = trunc i64 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP54]], i32 [[TMP62]], i32 [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP64]] to i64 +; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP41]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = mul i64 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP67]], 32 +; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP71:%.*]] = mul i32 [[TMP70]], [[TMP42]] +; CHECK-NEXT: [[TMP72:%.*]] = sub i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp uge i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp uge i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP75:%.*]] = and i1 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = sub i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP77:%.*]] = add i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP75]], i32 [[TMP76]], i32 [[TMP72]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP74]], i32 [[TMP78]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP79]], i64 1 +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i32> [[X]], i64 2 +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i32> [[Y]], i64 2 +; CHECK-NEXT: [[TMP83:%.*]] = uitofp i32 [[TMP82]] to float +; CHECK-NEXT: [[TMP84:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP83]]) +; CHECK-NEXT: [[TMP85:%.*]] = fmul fast float [[TMP84]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP86:%.*]] = fptoui float [[TMP85]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = zext i32 [[TMP86]] to i64 +; CHECK-NEXT: [[TMP88:%.*]] = zext i32 [[TMP82]] to i64 +; CHECK-NEXT: [[TMP89:%.*]] = mul i64 [[TMP87]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = trunc i64 [[TMP89]] to i32 +; CHECK-NEXT: [[TMP91:%.*]] = lshr i64 [[TMP89]], 32 +; CHECK-NEXT: [[TMP92:%.*]] = trunc i64 [[TMP91]] to i32 +; CHECK-NEXT: [[TMP93:%.*]] = sub i32 0, [[TMP90]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp eq i32 [[TMP92]], 0 +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[TMP93]], i32 [[TMP90]] +; CHECK-NEXT: [[TMP96:%.*]] = zext i32 [[TMP95]] to i64 +; CHECK-NEXT: [[TMP97:%.*]] = zext i32 [[TMP86]] to i64 +; CHECK-NEXT: [[TMP98:%.*]] = mul i64 [[TMP96]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = trunc i64 [[TMP98]] to i32 +; CHECK-NEXT: [[TMP100:%.*]] = lshr i64 [[TMP98]], 32 +; CHECK-NEXT: [[TMP101:%.*]] = trunc i64 [[TMP100]] to i32 +; CHECK-NEXT: [[TMP102:%.*]] = add i32 [[TMP86]], [[TMP101]] +; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP86]], [[TMP101]] +; CHECK-NEXT: [[TMP104:%.*]] = select i1 [[TMP94]], i32 [[TMP102]], i32 [[TMP103]] +; CHECK-NEXT: [[TMP105:%.*]] = zext i32 [[TMP104]] to i64 +; CHECK-NEXT: [[TMP106:%.*]] = zext i32 [[TMP81]] to i64 +; CHECK-NEXT: [[TMP107:%.*]] = mul i64 [[TMP105]], [[TMP106]] +; CHECK-NEXT: [[TMP108:%.*]] = trunc i64 [[TMP107]] to i32 +; CHECK-NEXT: [[TMP109:%.*]] = lshr i64 [[TMP107]], 32 +; CHECK-NEXT: [[TMP110:%.*]] = trunc i64 [[TMP109]] to i32 +; CHECK-NEXT: [[TMP111:%.*]] = mul i32 [[TMP110]], [[TMP82]] +; CHECK-NEXT: [[TMP112:%.*]] = sub i32 [[TMP81]], [[TMP111]] +; CHECK-NEXT: [[TMP113:%.*]] = icmp uge i32 [[TMP112]], [[TMP82]] +; CHECK-NEXT: [[TMP114:%.*]] = icmp uge i32 [[TMP81]], [[TMP111]] +; CHECK-NEXT: [[TMP115:%.*]] = and i1 [[TMP113]], [[TMP114]] +; CHECK-NEXT: [[TMP116:%.*]] = sub i32 [[TMP112]], [[TMP82]] +; CHECK-NEXT: [[TMP117:%.*]] = add i32 [[TMP112]], [[TMP82]] +; CHECK-NEXT: [[TMP118:%.*]] = select i1 [[TMP115]], i32 [[TMP116]], i32 [[TMP112]] +; CHECK-NEXT: [[TMP119:%.*]] = select i1 [[TMP114]], i32 [[TMP118]], i32 [[TMP117]] +; CHECK-NEXT: [[TMP120:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP119]], i64 2 +; CHECK-NEXT: [[TMP121:%.*]] = extractelement <4 x i32> [[X]], i64 3 +; CHECK-NEXT: [[TMP122:%.*]] = extractelement <4 x i32> [[Y]], i64 3 +; CHECK-NEXT: [[TMP123:%.*]] = uitofp i32 [[TMP122]] to float +; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP123]]) +; CHECK-NEXT: [[TMP125:%.*]] = fmul fast float [[TMP124]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP126:%.*]] = fptoui float [[TMP125]] to i32 +; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 +; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP122]] to i64 +; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] +; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 +; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 +; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 +; CHECK-NEXT: [[TMP133:%.*]] = sub i32 0, [[TMP130]] +; CHECK-NEXT: [[TMP134:%.*]] = icmp eq i32 [[TMP132]], 0 +; CHECK-NEXT: [[TMP135:%.*]] = select i1 [[TMP134]], i32 [[TMP133]], i32 [[TMP130]] ; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 -; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP131]] to i64 +; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP126]] to i64 ; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] ; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 ; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 ; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 -; CHECK-NEXT: [[TMP142:%.*]] = sub i32 0, [[TMP139]] -; CHECK-NEXT: [[TMP143:%.*]] = icmp eq i32 [[TMP141]], 0 -; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 [[TMP142]], i32 [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = add i32 [[TMP126]], [[TMP141]] +; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP126]], [[TMP141]] +; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP134]], i32 [[TMP142]], i32 [[TMP143]] ; CHECK-NEXT: [[TMP145:%.*]] = zext i32 [[TMP144]] to i64 -; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP135]] to i64 +; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP121]] to i64 ; CHECK-NEXT: [[TMP147:%.*]] = mul i64 [[TMP145]], [[TMP146]] ; CHECK-NEXT: [[TMP148:%.*]] = trunc i64 [[TMP147]] to i32 ; CHECK-NEXT: [[TMP149:%.*]] = lshr i64 [[TMP147]], 32 ; CHECK-NEXT: [[TMP150:%.*]] = trunc i64 [[TMP149]] to i32 -; CHECK-NEXT: [[TMP151:%.*]] = add i32 [[TMP135]], [[TMP150]] -; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP135]], [[TMP150]] -; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP143]], i32 [[TMP151]], i32 [[TMP152]] -; CHECK-NEXT: [[TMP154:%.*]] = zext i32 [[TMP153]] to i64 -; CHECK-NEXT: [[TMP155:%.*]] = zext i32 [[TMP130]] to i64 -; CHECK-NEXT: [[TMP156:%.*]] = mul i64 [[TMP154]], [[TMP155]] -; CHECK-NEXT: [[TMP157:%.*]] = trunc i64 [[TMP156]] to i32 -; CHECK-NEXT: [[TMP158:%.*]] = lshr i64 [[TMP156]], 32 -; CHECK-NEXT: [[TMP159:%.*]] = trunc i64 [[TMP158]] to i32 -; CHECK-NEXT: [[TMP160:%.*]] = mul i32 [[TMP159]], [[TMP131]] -; CHECK-NEXT: [[TMP161:%.*]] = sub i32 [[TMP130]], [[TMP160]] -; CHECK-NEXT: [[TMP162:%.*]] = icmp uge i32 [[TMP161]], [[TMP131]] -; CHECK-NEXT: [[TMP163:%.*]] = select i1 [[TMP162]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP164:%.*]] = icmp uge i32 [[TMP130]], [[TMP160]] -; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP166:%.*]] = and i32 [[TMP163]], [[TMP165]] -; CHECK-NEXT: [[TMP167:%.*]] = icmp eq i32 [[TMP166]], 0 -; CHECK-NEXT: [[TMP168:%.*]] = sub i32 [[TMP161]], [[TMP131]] -; CHECK-NEXT: [[TMP169:%.*]] = add i32 [[TMP161]], [[TMP131]] -; CHECK-NEXT: [[TMP170:%.*]] = select i1 [[TMP167]], i32 [[TMP161]], i32 [[TMP168]] -; CHECK-NEXT: [[TMP171:%.*]] = select i1 [[TMP164]], i32 [[TMP170]], i32 [[TMP169]] -; CHECK-NEXT: [[TMP172:%.*]] = insertelement <4 x i32> [[TMP129]], i32 [[TMP171]], i64 3 -; CHECK-NEXT: store <4 x i32> [[TMP172]], <4 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP151:%.*]] = mul i32 [[TMP150]], [[TMP122]] +; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP121]], [[TMP151]] +; CHECK-NEXT: [[TMP153:%.*]] = icmp uge i32 [[TMP152]], [[TMP122]] +; CHECK-NEXT: [[TMP154:%.*]] = icmp uge i32 [[TMP121]], [[TMP151]] +; CHECK-NEXT: [[TMP155:%.*]] = and i1 [[TMP153]], [[TMP154]] +; CHECK-NEXT: [[TMP156:%.*]] = sub i32 [[TMP152]], [[TMP122]] +; CHECK-NEXT: [[TMP157:%.*]] = add i32 [[TMP152]], [[TMP122]] +; CHECK-NEXT: [[TMP158:%.*]] = select i1 [[TMP155]], i32 [[TMP156]], i32 [[TMP152]] +; CHECK-NEXT: [[TMP159:%.*]] = select i1 [[TMP154]], i32 [[TMP158]], i32 [[TMP157]] +; CHECK-NEXT: [[TMP160:%.*]] = insertelement <4 x i32> [[TMP120]], i32 [[TMP159]], i64 3 +; CHECK-NEXT: store <4 x i32> [[TMP160]], <4 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: urem_v4i32: @@ -1402,175 +1366,163 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP9]] ; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP8]], [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP39]], [[TMP9]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP42:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] -; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP41]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP44]], 0 -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP37]], 1 -; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP37]], 1 -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP45]], i32 [[TMP37]], i32 [[TMP46]] -; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP42]], i32 [[TMP48]], i32 [[TMP47]] -; CHECK-NEXT: [[TMP50:%.*]] = xor i32 [[TMP49]], [[TMP5]] -; CHECK-NEXT: [[TMP51:%.*]] = sub i32 [[TMP50]], [[TMP5]] -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i32> undef, i32 [[TMP51]], i64 0 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i32> [[Y]], i64 1 -; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 -; CHECK-NEXT: [[TMP56:%.*]] = ashr i32 [[TMP54]], 31 -; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] -; CHECK-NEXT: [[TMP58:%.*]] = add i32 [[TMP53]], [[TMP55]] -; CHECK-NEXT: [[TMP59:%.*]] = add i32 [[TMP54]], [[TMP56]] -; CHECK-NEXT: [[TMP60:%.*]] = xor i32 [[TMP58]], [[TMP55]] -; CHECK-NEXT: [[TMP61:%.*]] = xor i32 [[TMP59]], [[TMP56]] -; CHECK-NEXT: [[TMP62:%.*]] = uitofp i32 [[TMP61]] to float -; CHECK-NEXT: [[TMP63:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP62]]) -; CHECK-NEXT: [[TMP64:%.*]] = fmul fast float [[TMP63]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP65:%.*]] = fptoui float [[TMP64]] to i32 -; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP65]] to i64 -; CHECK-NEXT: [[TMP67:%.*]] = zext i32 [[TMP61]] to i64 -; CHECK-NEXT: [[TMP68:%.*]] = mul i64 [[TMP66]], [[TMP67]] -; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 -; CHECK-NEXT: [[TMP70:%.*]] = lshr i64 [[TMP68]], 32 -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = sub i32 0, [[TMP69]] -; CHECK-NEXT: [[TMP73:%.*]] = icmp eq i32 [[TMP71]], 0 -; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[TMP72]], i32 [[TMP69]] -; CHECK-NEXT: [[TMP75:%.*]] = zext i32 [[TMP74]] to i64 -; CHECK-NEXT: [[TMP76:%.*]] = zext i32 [[TMP65]] to i64 -; CHECK-NEXT: [[TMP77:%.*]] = mul i64 [[TMP75]], [[TMP76]] -; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 -; CHECK-NEXT: [[TMP79:%.*]] = lshr i64 [[TMP77]], 32 -; CHECK-NEXT: [[TMP80:%.*]] = trunc i64 [[TMP79]] to i32 -; CHECK-NEXT: [[TMP81:%.*]] = add i32 [[TMP65]], [[TMP80]] -; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP65]], [[TMP80]] -; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP73]], i32 [[TMP81]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP84:%.*]] = zext i32 [[TMP83]] to i64 -; CHECK-NEXT: [[TMP85:%.*]] = zext i32 [[TMP60]] to i64 -; CHECK-NEXT: [[TMP86:%.*]] = mul i64 [[TMP84]], [[TMP85]] -; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 -; CHECK-NEXT: [[TMP88:%.*]] = lshr i64 [[TMP86]], 32 -; CHECK-NEXT: [[TMP89:%.*]] = trunc i64 [[TMP88]] to i32 -; CHECK-NEXT: [[TMP90:%.*]] = mul i32 [[TMP89]], [[TMP61]] -; CHECK-NEXT: [[TMP91:%.*]] = sub i32 [[TMP60]], [[TMP90]] -; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP91]], [[TMP61]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP94:%.*]] = icmp uge i32 [[TMP60]], [[TMP90]] -; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP96:%.*]] = and i32 [[TMP93]], [[TMP95]] -; CHECK-NEXT: [[TMP97:%.*]] = icmp eq i32 [[TMP96]], 0 -; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP89]], 1 -; CHECK-NEXT: [[TMP99:%.*]] = sub i32 [[TMP89]], 1 -; CHECK-NEXT: [[TMP100:%.*]] = select i1 [[TMP97]], i32 [[TMP89]], i32 [[TMP98]] -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP94]], i32 [[TMP100]], i32 [[TMP99]] -; CHECK-NEXT: [[TMP102:%.*]] = xor i32 [[TMP101]], [[TMP57]] -; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP102]], [[TMP57]] -; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i32> [[TMP52]], i32 [[TMP103]], i64 1 -; CHECK-NEXT: [[TMP105:%.*]] = extractelement <4 x i32> [[X]], i64 2 -; CHECK-NEXT: [[TMP106:%.*]] = extractelement <4 x i32> [[Y]], i64 2 -; CHECK-NEXT: [[TMP107:%.*]] = ashr i32 [[TMP105]], 31 -; CHECK-NEXT: [[TMP108:%.*]] = ashr i32 [[TMP106]], 31 -; CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[TMP107]], [[TMP108]] -; CHECK-NEXT: [[TMP110:%.*]] = add i32 [[TMP105]], [[TMP107]] -; CHECK-NEXT: [[TMP111:%.*]] = add i32 [[TMP106]], [[TMP108]] -; CHECK-NEXT: [[TMP112:%.*]] = xor i32 [[TMP110]], [[TMP107]] -; CHECK-NEXT: [[TMP113:%.*]] = xor i32 [[TMP111]], [[TMP108]] -; CHECK-NEXT: [[TMP114:%.*]] = uitofp i32 [[TMP113]] to float -; CHECK-NEXT: [[TMP115:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP114]]) -; CHECK-NEXT: [[TMP116:%.*]] = fmul fast float [[TMP115]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP117:%.*]] = fptoui float [[TMP116]] to i32 -; CHECK-NEXT: [[TMP118:%.*]] = zext i32 [[TMP117]] to i64 -; CHECK-NEXT: [[TMP119:%.*]] = zext i32 [[TMP113]] to i64 -; CHECK-NEXT: [[TMP120:%.*]] = mul i64 [[TMP118]], [[TMP119]] -; CHECK-NEXT: [[TMP121:%.*]] = trunc i64 [[TMP120]] to i32 -; CHECK-NEXT: [[TMP122:%.*]] = lshr i64 [[TMP120]], 32 -; CHECK-NEXT: [[TMP123:%.*]] = trunc i64 [[TMP122]] to i32 -; CHECK-NEXT: [[TMP124:%.*]] = sub i32 0, [[TMP121]] -; CHECK-NEXT: [[TMP125:%.*]] = icmp eq i32 [[TMP123]], 0 -; CHECK-NEXT: [[TMP126:%.*]] = select i1 [[TMP125]], i32 [[TMP124]], i32 [[TMP121]] -; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 -; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP117]] to i64 -; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] -; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 -; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 -; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 -; CHECK-NEXT: [[TMP133:%.*]] = add i32 [[TMP117]], [[TMP132]] -; CHECK-NEXT: [[TMP134:%.*]] = sub i32 [[TMP117]], [[TMP132]] -; CHECK-NEXT: [[TMP135:%.*]] = select i1 [[TMP125]], i32 [[TMP133]], i32 [[TMP134]] -; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 -; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP112]] to i64 -; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] -; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 -; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 -; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 -; CHECK-NEXT: [[TMP142:%.*]] = mul i32 [[TMP141]], [[TMP113]] -; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP112]], [[TMP142]] -; CHECK-NEXT: [[TMP144:%.*]] = icmp uge i32 [[TMP143]], [[TMP113]] -; CHECK-NEXT: [[TMP145:%.*]] = select i1 [[TMP144]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP146:%.*]] = icmp uge i32 [[TMP112]], [[TMP142]] -; CHECK-NEXT: [[TMP147:%.*]] = select i1 [[TMP146]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP148:%.*]] = and i32 [[TMP145]], [[TMP147]] -; CHECK-NEXT: [[TMP149:%.*]] = icmp eq i32 [[TMP148]], 0 -; CHECK-NEXT: [[TMP150:%.*]] = add i32 [[TMP141]], 1 -; CHECK-NEXT: [[TMP151:%.*]] = sub i32 [[TMP141]], 1 -; CHECK-NEXT: [[TMP152:%.*]] = select i1 [[TMP149]], i32 [[TMP141]], i32 [[TMP150]] -; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP146]], i32 [[TMP152]], i32 [[TMP151]] -; CHECK-NEXT: [[TMP154:%.*]] = xor i32 [[TMP153]], [[TMP109]] -; CHECK-NEXT: [[TMP155:%.*]] = sub i32 [[TMP154]], [[TMP109]] -; CHECK-NEXT: [[TMP156:%.*]] = insertelement <4 x i32> [[TMP104]], i32 [[TMP155]], i64 2 -; CHECK-NEXT: [[TMP157:%.*]] = extractelement <4 x i32> [[X]], i64 3 -; CHECK-NEXT: [[TMP158:%.*]] = extractelement <4 x i32> [[Y]], i64 3 -; CHECK-NEXT: [[TMP159:%.*]] = ashr i32 [[TMP157]], 31 -; CHECK-NEXT: [[TMP160:%.*]] = ashr i32 [[TMP158]], 31 -; CHECK-NEXT: [[TMP161:%.*]] = xor i32 [[TMP159]], [[TMP160]] -; CHECK-NEXT: [[TMP162:%.*]] = add i32 [[TMP157]], [[TMP159]] -; CHECK-NEXT: [[TMP163:%.*]] = add i32 [[TMP158]], [[TMP160]] -; CHECK-NEXT: [[TMP164:%.*]] = xor i32 [[TMP162]], [[TMP159]] -; CHECK-NEXT: [[TMP165:%.*]] = xor i32 [[TMP163]], [[TMP160]] -; CHECK-NEXT: [[TMP166:%.*]] = uitofp i32 [[TMP165]] to float -; CHECK-NEXT: [[TMP167:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP166]]) -; CHECK-NEXT: [[TMP168:%.*]] = fmul fast float [[TMP167]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP169:%.*]] = fptoui float [[TMP168]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] +; CHECK-NEXT: [[TMP42:%.*]] = and i1 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP37]], 1 +; CHECK-NEXT: [[TMP44:%.*]] = sub i32 [[TMP37]], 1 +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP42]], i32 [[TMP43]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP41]], i32 [[TMP45]], i32 [[TMP44]] +; CHECK-NEXT: [[TMP47:%.*]] = xor i32 [[TMP46]], [[TMP5]] +; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], [[TMP5]] +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x i32> undef, i32 [[TMP48]], i64 0 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <4 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <4 x i32> [[Y]], i64 1 +; CHECK-NEXT: [[TMP52:%.*]] = ashr i32 [[TMP50]], 31 +; CHECK-NEXT: [[TMP53:%.*]] = ashr i32 [[TMP51]], 31 +; CHECK-NEXT: [[TMP54:%.*]] = xor i32 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = add i32 [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP51]], [[TMP53]] +; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP52]] +; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP53]] +; CHECK-NEXT: [[TMP59:%.*]] = uitofp i32 [[TMP58]] to float +; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP59]]) +; CHECK-NEXT: [[TMP61:%.*]] = fmul fast float [[TMP60]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP62:%.*]] = fptoui float [[TMP61]] to i32 +; CHECK-NEXT: [[TMP63:%.*]] = zext i32 [[TMP62]] to i64 +; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP58]] to i64 +; CHECK-NEXT: [[TMP65:%.*]] = mul i64 [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = trunc i64 [[TMP65]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = lshr i64 [[TMP65]], 32 +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = sub i32 0, [[TMP66]] +; CHECK-NEXT: [[TMP70:%.*]] = icmp eq i32 [[TMP68]], 0 +; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP69]], i32 [[TMP66]] +; CHECK-NEXT: [[TMP72:%.*]] = zext i32 [[TMP71]] to i64 +; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP62]] to i64 +; CHECK-NEXT: [[TMP74:%.*]] = mul i64 [[TMP72]], [[TMP73]] +; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i32 +; CHECK-NEXT: [[TMP76:%.*]] = lshr i64 [[TMP74]], 32 +; CHECK-NEXT: [[TMP77:%.*]] = trunc i64 [[TMP76]] to i32 +; CHECK-NEXT: [[TMP78:%.*]] = add i32 [[TMP62]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = sub i32 [[TMP62]], [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = select i1 [[TMP70]], i32 [[TMP78]], i32 [[TMP79]] +; CHECK-NEXT: [[TMP81:%.*]] = zext i32 [[TMP80]] to i64 +; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP57]] to i64 +; CHECK-NEXT: [[TMP83:%.*]] = mul i64 [[TMP81]], [[TMP82]] +; CHECK-NEXT: [[TMP84:%.*]] = trunc i64 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP85:%.*]] = lshr i64 [[TMP83]], 32 +; CHECK-NEXT: [[TMP86:%.*]] = trunc i64 [[TMP85]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = mul i32 [[TMP86]], [[TMP58]] +; CHECK-NEXT: [[TMP88:%.*]] = sub i32 [[TMP57]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = icmp uge i32 [[TMP88]], [[TMP58]] +; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP57]], [[TMP87]] +; CHECK-NEXT: [[TMP91:%.*]] = and i1 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP86]], 1 +; CHECK-NEXT: [[TMP93:%.*]] = sub i32 [[TMP86]], 1 +; CHECK-NEXT: [[TMP94:%.*]] = select i1 [[TMP91]], i32 [[TMP92]], i32 [[TMP86]] +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP90]], i32 [[TMP94]], i32 [[TMP93]] +; CHECK-NEXT: [[TMP96:%.*]] = xor i32 [[TMP95]], [[TMP54]] +; CHECK-NEXT: [[TMP97:%.*]] = sub i32 [[TMP96]], [[TMP54]] +; CHECK-NEXT: [[TMP98:%.*]] = insertelement <4 x i32> [[TMP49]], i32 [[TMP97]], i64 1 +; CHECK-NEXT: [[TMP99:%.*]] = extractelement <4 x i32> [[X]], i64 2 +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <4 x i32> [[Y]], i64 2 +; CHECK-NEXT: [[TMP101:%.*]] = ashr i32 [[TMP99]], 31 +; CHECK-NEXT: [[TMP102:%.*]] = ashr i32 [[TMP100]], 31 +; CHECK-NEXT: [[TMP103:%.*]] = xor i32 [[TMP101]], [[TMP102]] +; CHECK-NEXT: [[TMP104:%.*]] = add i32 [[TMP99]], [[TMP101]] +; CHECK-NEXT: [[TMP105:%.*]] = add i32 [[TMP100]], [[TMP102]] +; CHECK-NEXT: [[TMP106:%.*]] = xor i32 [[TMP104]], [[TMP101]] +; CHECK-NEXT: [[TMP107:%.*]] = xor i32 [[TMP105]], [[TMP102]] +; CHECK-NEXT: [[TMP108:%.*]] = uitofp i32 [[TMP107]] to float +; CHECK-NEXT: [[TMP109:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP108]]) +; CHECK-NEXT: [[TMP110:%.*]] = fmul fast float [[TMP109]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP111:%.*]] = fptoui float [[TMP110]] to i32 +; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP111]] to i64 +; CHECK-NEXT: [[TMP113:%.*]] = zext i32 [[TMP107]] to i64 +; CHECK-NEXT: [[TMP114:%.*]] = mul i64 [[TMP112]], [[TMP113]] +; CHECK-NEXT: [[TMP115:%.*]] = trunc i64 [[TMP114]] to i32 +; CHECK-NEXT: [[TMP116:%.*]] = lshr i64 [[TMP114]], 32 +; CHECK-NEXT: [[TMP117:%.*]] = trunc i64 [[TMP116]] to i32 +; CHECK-NEXT: [[TMP118:%.*]] = sub i32 0, [[TMP115]] +; CHECK-NEXT: [[TMP119:%.*]] = icmp eq i32 [[TMP117]], 0 +; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 [[TMP118]], i32 [[TMP115]] +; CHECK-NEXT: [[TMP121:%.*]] = zext i32 [[TMP120]] to i64 +; CHECK-NEXT: [[TMP122:%.*]] = zext i32 [[TMP111]] to i64 +; CHECK-NEXT: [[TMP123:%.*]] = mul i64 [[TMP121]], [[TMP122]] +; CHECK-NEXT: [[TMP124:%.*]] = trunc i64 [[TMP123]] to i32 +; CHECK-NEXT: [[TMP125:%.*]] = lshr i64 [[TMP123]], 32 +; CHECK-NEXT: [[TMP126:%.*]] = trunc i64 [[TMP125]] to i32 +; CHECK-NEXT: [[TMP127:%.*]] = add i32 [[TMP111]], [[TMP126]] +; CHECK-NEXT: [[TMP128:%.*]] = sub i32 [[TMP111]], [[TMP126]] +; CHECK-NEXT: [[TMP129:%.*]] = select i1 [[TMP119]], i32 [[TMP127]], i32 [[TMP128]] +; CHECK-NEXT: [[TMP130:%.*]] = zext i32 [[TMP129]] to i64 +; CHECK-NEXT: [[TMP131:%.*]] = zext i32 [[TMP106]] to i64 +; CHECK-NEXT: [[TMP132:%.*]] = mul i64 [[TMP130]], [[TMP131]] +; CHECK-NEXT: [[TMP133:%.*]] = trunc i64 [[TMP132]] to i32 +; CHECK-NEXT: [[TMP134:%.*]] = lshr i64 [[TMP132]], 32 +; CHECK-NEXT: [[TMP135:%.*]] = trunc i64 [[TMP134]] to i32 +; CHECK-NEXT: [[TMP136:%.*]] = mul i32 [[TMP135]], [[TMP107]] +; CHECK-NEXT: [[TMP137:%.*]] = sub i32 [[TMP106]], [[TMP136]] +; CHECK-NEXT: [[TMP138:%.*]] = icmp uge i32 [[TMP137]], [[TMP107]] +; CHECK-NEXT: [[TMP139:%.*]] = icmp uge i32 [[TMP106]], [[TMP136]] +; CHECK-NEXT: [[TMP140:%.*]] = and i1 [[TMP138]], [[TMP139]] +; CHECK-NEXT: [[TMP141:%.*]] = add i32 [[TMP135]], 1 +; CHECK-NEXT: [[TMP142:%.*]] = sub i32 [[TMP135]], 1 +; CHECK-NEXT: [[TMP143:%.*]] = select i1 [[TMP140]], i32 [[TMP141]], i32 [[TMP135]] +; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP139]], i32 [[TMP143]], i32 [[TMP142]] +; CHECK-NEXT: [[TMP145:%.*]] = xor i32 [[TMP144]], [[TMP103]] +; CHECK-NEXT: [[TMP146:%.*]] = sub i32 [[TMP145]], [[TMP103]] +; CHECK-NEXT: [[TMP147:%.*]] = insertelement <4 x i32> [[TMP98]], i32 [[TMP146]], i64 2 +; CHECK-NEXT: [[TMP148:%.*]] = extractelement <4 x i32> [[X]], i64 3 +; CHECK-NEXT: [[TMP149:%.*]] = extractelement <4 x i32> [[Y]], i64 3 +; CHECK-NEXT: [[TMP150:%.*]] = ashr i32 [[TMP148]], 31 +; CHECK-NEXT: [[TMP151:%.*]] = ashr i32 [[TMP149]], 31 +; CHECK-NEXT: [[TMP152:%.*]] = xor i32 [[TMP150]], [[TMP151]] +; CHECK-NEXT: [[TMP153:%.*]] = add i32 [[TMP148]], [[TMP150]] +; CHECK-NEXT: [[TMP154:%.*]] = add i32 [[TMP149]], [[TMP151]] +; CHECK-NEXT: [[TMP155:%.*]] = xor i32 [[TMP153]], [[TMP150]] +; CHECK-NEXT: [[TMP156:%.*]] = xor i32 [[TMP154]], [[TMP151]] +; CHECK-NEXT: [[TMP157:%.*]] = uitofp i32 [[TMP156]] to float +; CHECK-NEXT: [[TMP158:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP157]]) +; CHECK-NEXT: [[TMP159:%.*]] = fmul fast float [[TMP158]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP160:%.*]] = fptoui float [[TMP159]] to i32 +; CHECK-NEXT: [[TMP161:%.*]] = zext i32 [[TMP160]] to i64 +; CHECK-NEXT: [[TMP162:%.*]] = zext i32 [[TMP156]] to i64 +; CHECK-NEXT: [[TMP163:%.*]] = mul i64 [[TMP161]], [[TMP162]] +; CHECK-NEXT: [[TMP164:%.*]] = trunc i64 [[TMP163]] to i32 +; CHECK-NEXT: [[TMP165:%.*]] = lshr i64 [[TMP163]], 32 +; CHECK-NEXT: [[TMP166:%.*]] = trunc i64 [[TMP165]] to i32 +; CHECK-NEXT: [[TMP167:%.*]] = sub i32 0, [[TMP164]] +; CHECK-NEXT: [[TMP168:%.*]] = icmp eq i32 [[TMP166]], 0 +; CHECK-NEXT: [[TMP169:%.*]] = select i1 [[TMP168]], i32 [[TMP167]], i32 [[TMP164]] ; CHECK-NEXT: [[TMP170:%.*]] = zext i32 [[TMP169]] to i64 -; CHECK-NEXT: [[TMP171:%.*]] = zext i32 [[TMP165]] to i64 +; CHECK-NEXT: [[TMP171:%.*]] = zext i32 [[TMP160]] to i64 ; CHECK-NEXT: [[TMP172:%.*]] = mul i64 [[TMP170]], [[TMP171]] ; CHECK-NEXT: [[TMP173:%.*]] = trunc i64 [[TMP172]] to i32 ; CHECK-NEXT: [[TMP174:%.*]] = lshr i64 [[TMP172]], 32 ; CHECK-NEXT: [[TMP175:%.*]] = trunc i64 [[TMP174]] to i32 -; CHECK-NEXT: [[TMP176:%.*]] = sub i32 0, [[TMP173]] -; CHECK-NEXT: [[TMP177:%.*]] = icmp eq i32 [[TMP175]], 0 -; CHECK-NEXT: [[TMP178:%.*]] = select i1 [[TMP177]], i32 [[TMP176]], i32 [[TMP173]] +; CHECK-NEXT: [[TMP176:%.*]] = add i32 [[TMP160]], [[TMP175]] +; CHECK-NEXT: [[TMP177:%.*]] = sub i32 [[TMP160]], [[TMP175]] +; CHECK-NEXT: [[TMP178:%.*]] = select i1 [[TMP168]], i32 [[TMP176]], i32 [[TMP177]] ; CHECK-NEXT: [[TMP179:%.*]] = zext i32 [[TMP178]] to i64 -; CHECK-NEXT: [[TMP180:%.*]] = zext i32 [[TMP169]] to i64 +; CHECK-NEXT: [[TMP180:%.*]] = zext i32 [[TMP155]] to i64 ; CHECK-NEXT: [[TMP181:%.*]] = mul i64 [[TMP179]], [[TMP180]] ; CHECK-NEXT: [[TMP182:%.*]] = trunc i64 [[TMP181]] to i32 ; CHECK-NEXT: [[TMP183:%.*]] = lshr i64 [[TMP181]], 32 ; CHECK-NEXT: [[TMP184:%.*]] = trunc i64 [[TMP183]] to i32 -; CHECK-NEXT: [[TMP185:%.*]] = add i32 [[TMP169]], [[TMP184]] -; CHECK-NEXT: [[TMP186:%.*]] = sub i32 [[TMP169]], [[TMP184]] -; CHECK-NEXT: [[TMP187:%.*]] = select i1 [[TMP177]], i32 [[TMP185]], i32 [[TMP186]] -; CHECK-NEXT: [[TMP188:%.*]] = zext i32 [[TMP187]] to i64 -; CHECK-NEXT: [[TMP189:%.*]] = zext i32 [[TMP164]] to i64 -; CHECK-NEXT: [[TMP190:%.*]] = mul i64 [[TMP188]], [[TMP189]] -; CHECK-NEXT: [[TMP191:%.*]] = trunc i64 [[TMP190]] to i32 -; CHECK-NEXT: [[TMP192:%.*]] = lshr i64 [[TMP190]], 32 -; CHECK-NEXT: [[TMP193:%.*]] = trunc i64 [[TMP192]] to i32 -; CHECK-NEXT: [[TMP194:%.*]] = mul i32 [[TMP193]], [[TMP165]] -; CHECK-NEXT: [[TMP195:%.*]] = sub i32 [[TMP164]], [[TMP194]] -; CHECK-NEXT: [[TMP196:%.*]] = icmp uge i32 [[TMP195]], [[TMP165]] -; CHECK-NEXT: [[TMP197:%.*]] = select i1 [[TMP196]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP198:%.*]] = icmp uge i32 [[TMP164]], [[TMP194]] -; CHECK-NEXT: [[TMP199:%.*]] = select i1 [[TMP198]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP200:%.*]] = and i32 [[TMP197]], [[TMP199]] -; CHECK-NEXT: [[TMP201:%.*]] = icmp eq i32 [[TMP200]], 0 -; CHECK-NEXT: [[TMP202:%.*]] = add i32 [[TMP193]], 1 -; CHECK-NEXT: [[TMP203:%.*]] = sub i32 [[TMP193]], 1 -; CHECK-NEXT: [[TMP204:%.*]] = select i1 [[TMP201]], i32 [[TMP193]], i32 [[TMP202]] -; CHECK-NEXT: [[TMP205:%.*]] = select i1 [[TMP198]], i32 [[TMP204]], i32 [[TMP203]] -; CHECK-NEXT: [[TMP206:%.*]] = xor i32 [[TMP205]], [[TMP161]] -; CHECK-NEXT: [[TMP207:%.*]] = sub i32 [[TMP206]], [[TMP161]] -; CHECK-NEXT: [[TMP208:%.*]] = insertelement <4 x i32> [[TMP156]], i32 [[TMP207]], i64 3 -; CHECK-NEXT: store <4 x i32> [[TMP208]], <4 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP185:%.*]] = mul i32 [[TMP184]], [[TMP156]] +; CHECK-NEXT: [[TMP186:%.*]] = sub i32 [[TMP155]], [[TMP185]] +; CHECK-NEXT: [[TMP187:%.*]] = icmp uge i32 [[TMP186]], [[TMP156]] +; CHECK-NEXT: [[TMP188:%.*]] = icmp uge i32 [[TMP155]], [[TMP185]] +; CHECK-NEXT: [[TMP189:%.*]] = and i1 [[TMP187]], [[TMP188]] +; CHECK-NEXT: [[TMP190:%.*]] = add i32 [[TMP184]], 1 +; CHECK-NEXT: [[TMP191:%.*]] = sub i32 [[TMP184]], 1 +; CHECK-NEXT: [[TMP192:%.*]] = select i1 [[TMP189]], i32 [[TMP190]], i32 [[TMP184]] +; CHECK-NEXT: [[TMP193:%.*]] = select i1 [[TMP188]], i32 [[TMP192]], i32 [[TMP191]] +; CHECK-NEXT: [[TMP194:%.*]] = xor i32 [[TMP193]], [[TMP152]] +; CHECK-NEXT: [[TMP195:%.*]] = sub i32 [[TMP194]], [[TMP152]] +; CHECK-NEXT: [[TMP196:%.*]] = insertelement <4 x i32> [[TMP147]], i32 [[TMP195]], i64 3 +; CHECK-NEXT: store <4 x i32> [[TMP196]], <4 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: sdiv_v4i32: @@ -1757,172 +1709,160 @@ define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], [[TMP8]] ; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP7]], [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], [[TMP42]] -; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], 0 -; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP44]], i32 [[TMP38]], i32 [[TMP45]] -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], i32 [[TMP47]], i32 [[TMP46]] -; CHECK-NEXT: [[TMP49:%.*]] = xor i32 [[TMP48]], [[TMP3]] -; CHECK-NEXT: [[TMP50:%.*]] = sub i32 [[TMP49]], [[TMP3]] -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> undef, i32 [[TMP50]], i64 0 -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[Y]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP52]], 31 -; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 -; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP52]], [[TMP54]] -; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP53]], [[TMP55]] -; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP54]] -; CHECK-NEXT: [[TMP59:%.*]] = xor i32 [[TMP57]], [[TMP55]] -; CHECK-NEXT: [[TMP60:%.*]] = uitofp i32 [[TMP59]] to float -; CHECK-NEXT: [[TMP61:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP60]]) -; CHECK-NEXT: [[TMP62:%.*]] = fmul fast float [[TMP61]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP63:%.*]] = fptoui float [[TMP62]] to i32 -; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP63]] to i64 -; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP59]] to i64 -; CHECK-NEXT: [[TMP66:%.*]] = mul i64 [[TMP64]], [[TMP65]] -; CHECK-NEXT: [[TMP67:%.*]] = trunc i64 [[TMP66]] to i32 -; CHECK-NEXT: [[TMP68:%.*]] = lshr i64 [[TMP66]], 32 -; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 -; CHECK-NEXT: [[TMP70:%.*]] = sub i32 0, [[TMP67]] -; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[TMP69]], 0 -; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP70]], i32 [[TMP67]] -; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP72]] to i64 -; CHECK-NEXT: [[TMP74:%.*]] = zext i32 [[TMP63]] to i64 -; CHECK-NEXT: [[TMP75:%.*]] = mul i64 [[TMP73]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = trunc i64 [[TMP75]] to i32 -; CHECK-NEXT: [[TMP77:%.*]] = lshr i64 [[TMP75]], 32 -; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 -; CHECK-NEXT: [[TMP79:%.*]] = add i32 [[TMP63]], [[TMP78]] -; CHECK-NEXT: [[TMP80:%.*]] = sub i32 [[TMP63]], [[TMP78]] -; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP71]], i32 [[TMP79]], i32 [[TMP80]] -; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP81]] to i64 -; CHECK-NEXT: [[TMP83:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP82]], [[TMP83]] -; CHECK-NEXT: [[TMP85:%.*]] = trunc i64 [[TMP84]] to i32 -; CHECK-NEXT: [[TMP86:%.*]] = lshr i64 [[TMP84]], 32 -; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 -; CHECK-NEXT: [[TMP88:%.*]] = mul i32 [[TMP87]], [[TMP59]] -; CHECK-NEXT: [[TMP89:%.*]] = sub i32 [[TMP58]], [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP58]], [[TMP88]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP94:%.*]] = and i32 [[TMP91]], [[TMP93]] -; CHECK-NEXT: [[TMP95:%.*]] = icmp eq i32 [[TMP94]], 0 -; CHECK-NEXT: [[TMP96:%.*]] = sub i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP97:%.*]] = add i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP95]], i32 [[TMP89]], i32 [[TMP96]] -; CHECK-NEXT: [[TMP99:%.*]] = select i1 [[TMP92]], i32 [[TMP98]], i32 [[TMP97]] -; CHECK-NEXT: [[TMP100:%.*]] = xor i32 [[TMP99]], [[TMP54]] -; CHECK-NEXT: [[TMP101:%.*]] = sub i32 [[TMP100]], [[TMP54]] -; CHECK-NEXT: [[TMP102:%.*]] = insertelement <4 x i32> [[TMP51]], i32 [[TMP101]], i64 1 -; CHECK-NEXT: [[TMP103:%.*]] = extractelement <4 x i32> [[X]], i64 2 -; CHECK-NEXT: [[TMP104:%.*]] = extractelement <4 x i32> [[Y]], i64 2 -; CHECK-NEXT: [[TMP105:%.*]] = ashr i32 [[TMP103]], 31 -; CHECK-NEXT: [[TMP106:%.*]] = ashr i32 [[TMP104]], 31 -; CHECK-NEXT: [[TMP107:%.*]] = add i32 [[TMP103]], [[TMP105]] -; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP104]], [[TMP106]] -; CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[TMP107]], [[TMP105]] -; CHECK-NEXT: [[TMP110:%.*]] = xor i32 [[TMP108]], [[TMP106]] -; CHECK-NEXT: [[TMP111:%.*]] = uitofp i32 [[TMP110]] to float -; CHECK-NEXT: [[TMP112:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP111]]) -; CHECK-NEXT: [[TMP113:%.*]] = fmul fast float [[TMP112]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP114:%.*]] = fptoui float [[TMP113]] to i32 -; CHECK-NEXT: [[TMP115:%.*]] = zext i32 [[TMP114]] to i64 -; CHECK-NEXT: [[TMP116:%.*]] = zext i32 [[TMP110]] to i64 -; CHECK-NEXT: [[TMP117:%.*]] = mul i64 [[TMP115]], [[TMP116]] -; CHECK-NEXT: [[TMP118:%.*]] = trunc i64 [[TMP117]] to i32 -; CHECK-NEXT: [[TMP119:%.*]] = lshr i64 [[TMP117]], 32 -; CHECK-NEXT: [[TMP120:%.*]] = trunc i64 [[TMP119]] to i32 -; CHECK-NEXT: [[TMP121:%.*]] = sub i32 0, [[TMP118]] -; CHECK-NEXT: [[TMP122:%.*]] = icmp eq i32 [[TMP120]], 0 -; CHECK-NEXT: [[TMP123:%.*]] = select i1 [[TMP122]], i32 [[TMP121]], i32 [[TMP118]] -; CHECK-NEXT: [[TMP124:%.*]] = zext i32 [[TMP123]] to i64 -; CHECK-NEXT: [[TMP125:%.*]] = zext i32 [[TMP114]] to i64 -; CHECK-NEXT: [[TMP126:%.*]] = mul i64 [[TMP124]], [[TMP125]] -; CHECK-NEXT: [[TMP127:%.*]] = trunc i64 [[TMP126]] to i32 -; CHECK-NEXT: [[TMP128:%.*]] = lshr i64 [[TMP126]], 32 -; CHECK-NEXT: [[TMP129:%.*]] = trunc i64 [[TMP128]] to i32 -; CHECK-NEXT: [[TMP130:%.*]] = add i32 [[TMP114]], [[TMP129]] -; CHECK-NEXT: [[TMP131:%.*]] = sub i32 [[TMP114]], [[TMP129]] -; CHECK-NEXT: [[TMP132:%.*]] = select i1 [[TMP122]], i32 [[TMP130]], i32 [[TMP131]] -; CHECK-NEXT: [[TMP133:%.*]] = zext i32 [[TMP132]] to i64 -; CHECK-NEXT: [[TMP134:%.*]] = zext i32 [[TMP109]] to i64 -; CHECK-NEXT: [[TMP135:%.*]] = mul i64 [[TMP133]], [[TMP134]] -; CHECK-NEXT: [[TMP136:%.*]] = trunc i64 [[TMP135]] to i32 -; CHECK-NEXT: [[TMP137:%.*]] = lshr i64 [[TMP135]], 32 -; CHECK-NEXT: [[TMP138:%.*]] = trunc i64 [[TMP137]] to i32 -; CHECK-NEXT: [[TMP139:%.*]] = mul i32 [[TMP138]], [[TMP110]] -; CHECK-NEXT: [[TMP140:%.*]] = sub i32 [[TMP109]], [[TMP139]] -; CHECK-NEXT: [[TMP141:%.*]] = icmp uge i32 [[TMP140]], [[TMP110]] -; CHECK-NEXT: [[TMP142:%.*]] = select i1 [[TMP141]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP143:%.*]] = icmp uge i32 [[TMP109]], [[TMP139]] -; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP145:%.*]] = and i32 [[TMP142]], [[TMP144]] -; CHECK-NEXT: [[TMP146:%.*]] = icmp eq i32 [[TMP145]], 0 -; CHECK-NEXT: [[TMP147:%.*]] = sub i32 [[TMP140]], [[TMP110]] -; CHECK-NEXT: [[TMP148:%.*]] = add i32 [[TMP140]], [[TMP110]] -; CHECK-NEXT: [[TMP149:%.*]] = select i1 [[TMP146]], i32 [[TMP140]], i32 [[TMP147]] -; CHECK-NEXT: [[TMP150:%.*]] = select i1 [[TMP143]], i32 [[TMP149]], i32 [[TMP148]] -; CHECK-NEXT: [[TMP151:%.*]] = xor i32 [[TMP150]], [[TMP105]] -; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP151]], [[TMP105]] -; CHECK-NEXT: [[TMP153:%.*]] = insertelement <4 x i32> [[TMP102]], i32 [[TMP152]], i64 2 -; CHECK-NEXT: [[TMP154:%.*]] = extractelement <4 x i32> [[X]], i64 3 -; CHECK-NEXT: [[TMP155:%.*]] = extractelement <4 x i32> [[Y]], i64 3 -; CHECK-NEXT: [[TMP156:%.*]] = ashr i32 [[TMP154]], 31 -; CHECK-NEXT: [[TMP157:%.*]] = ashr i32 [[TMP155]], 31 -; CHECK-NEXT: [[TMP158:%.*]] = add i32 [[TMP154]], [[TMP156]] -; CHECK-NEXT: [[TMP159:%.*]] = add i32 [[TMP155]], [[TMP157]] -; CHECK-NEXT: [[TMP160:%.*]] = xor i32 [[TMP158]], [[TMP156]] -; CHECK-NEXT: [[TMP161:%.*]] = xor i32 [[TMP159]], [[TMP157]] -; CHECK-NEXT: [[TMP162:%.*]] = uitofp i32 [[TMP161]] to float -; CHECK-NEXT: [[TMP163:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP162]]) -; CHECK-NEXT: [[TMP164:%.*]] = fmul fast float [[TMP163]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP165:%.*]] = fptoui float [[TMP164]] to i32 +; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] +; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = sub i32 [[TMP38]], [[TMP8]] +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP38]], [[TMP8]] +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP41]], i32 [[TMP42]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP40]], i32 [[TMP44]], i32 [[TMP43]] +; CHECK-NEXT: [[TMP46:%.*]] = xor i32 [[TMP45]], [[TMP3]] +; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP46]], [[TMP3]] +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <4 x i32> undef, i32 [[TMP47]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <4 x i32> [[Y]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = ashr i32 [[TMP49]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = ashr i32 [[TMP50]], 31 +; CHECK-NEXT: [[TMP53:%.*]] = add i32 [[TMP49]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = add i32 [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP55:%.*]] = xor i32 [[TMP53]], [[TMP51]] +; CHECK-NEXT: [[TMP56:%.*]] = xor i32 [[TMP54]], [[TMP52]] +; CHECK-NEXT: [[TMP57:%.*]] = uitofp i32 [[TMP56]] to float +; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]]) +; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP58]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP60:%.*]] = fptoui float [[TMP59]] to i32 +; CHECK-NEXT: [[TMP61:%.*]] = zext i32 [[TMP60]] to i64 +; CHECK-NEXT: [[TMP62:%.*]] = zext i32 [[TMP56]] to i64 +; CHECK-NEXT: [[TMP63:%.*]] = mul i64 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 +; CHECK-NEXT: [[TMP65:%.*]] = lshr i64 [[TMP63]], 32 +; CHECK-NEXT: [[TMP66:%.*]] = trunc i64 [[TMP65]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = sub i32 0, [[TMP64]] +; CHECK-NEXT: [[TMP68:%.*]] = icmp eq i32 [[TMP66]], 0 +; CHECK-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP67]], i32 [[TMP64]] +; CHECK-NEXT: [[TMP70:%.*]] = zext i32 [[TMP69]] to i64 +; CHECK-NEXT: [[TMP71:%.*]] = zext i32 [[TMP60]] to i64 +; CHECK-NEXT: [[TMP72:%.*]] = mul i64 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 +; CHECK-NEXT: [[TMP74:%.*]] = lshr i64 [[TMP72]], 32 +; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i32 +; CHECK-NEXT: [[TMP76:%.*]] = add i32 [[TMP60]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = sub i32 [[TMP60]], [[TMP75]] +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP68]], i32 [[TMP76]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = zext i32 [[TMP78]] to i64 +; CHECK-NEXT: [[TMP80:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP81:%.*]] = mul i64 [[TMP79]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = trunc i64 [[TMP81]] to i32 +; CHECK-NEXT: [[TMP83:%.*]] = lshr i64 [[TMP81]], 32 +; CHECK-NEXT: [[TMP84:%.*]] = trunc i64 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP85:%.*]] = mul i32 [[TMP84]], [[TMP56]] +; CHECK-NEXT: [[TMP86:%.*]] = sub i32 [[TMP55]], [[TMP85]] +; CHECK-NEXT: [[TMP87:%.*]] = icmp uge i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP88:%.*]] = icmp uge i32 [[TMP55]], [[TMP85]] +; CHECK-NEXT: [[TMP89:%.*]] = and i1 [[TMP87]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = sub i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP91:%.*]] = add i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP89]], i32 [[TMP90]], i32 [[TMP86]] +; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP88]], i32 [[TMP92]], i32 [[TMP91]] +; CHECK-NEXT: [[TMP94:%.*]] = xor i32 [[TMP93]], [[TMP51]] +; CHECK-NEXT: [[TMP95:%.*]] = sub i32 [[TMP94]], [[TMP51]] +; CHECK-NEXT: [[TMP96:%.*]] = insertelement <4 x i32> [[TMP48]], i32 [[TMP95]], i64 1 +; CHECK-NEXT: [[TMP97:%.*]] = extractelement <4 x i32> [[X]], i64 2 +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <4 x i32> [[Y]], i64 2 +; CHECK-NEXT: [[TMP99:%.*]] = ashr i32 [[TMP97]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = ashr i32 [[TMP98]], 31 +; CHECK-NEXT: [[TMP101:%.*]] = add i32 [[TMP97]], [[TMP99]] +; CHECK-NEXT: [[TMP102:%.*]] = add i32 [[TMP98]], [[TMP100]] +; CHECK-NEXT: [[TMP103:%.*]] = xor i32 [[TMP101]], [[TMP99]] +; CHECK-NEXT: [[TMP104:%.*]] = xor i32 [[TMP102]], [[TMP100]] +; CHECK-NEXT: [[TMP105:%.*]] = uitofp i32 [[TMP104]] to float +; CHECK-NEXT: [[TMP106:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP105]]) +; CHECK-NEXT: [[TMP107:%.*]] = fmul fast float [[TMP106]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP108:%.*]] = fptoui float [[TMP107]] to i32 +; CHECK-NEXT: [[TMP109:%.*]] = zext i32 [[TMP108]] to i64 +; CHECK-NEXT: [[TMP110:%.*]] = zext i32 [[TMP104]] to i64 +; CHECK-NEXT: [[TMP111:%.*]] = mul i64 [[TMP109]], [[TMP110]] +; CHECK-NEXT: [[TMP112:%.*]] = trunc i64 [[TMP111]] to i32 +; CHECK-NEXT: [[TMP113:%.*]] = lshr i64 [[TMP111]], 32 +; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 +; CHECK-NEXT: [[TMP115:%.*]] = sub i32 0, [[TMP112]] +; CHECK-NEXT: [[TMP116:%.*]] = icmp eq i32 [[TMP114]], 0 +; CHECK-NEXT: [[TMP117:%.*]] = select i1 [[TMP116]], i32 [[TMP115]], i32 [[TMP112]] +; CHECK-NEXT: [[TMP118:%.*]] = zext i32 [[TMP117]] to i64 +; CHECK-NEXT: [[TMP119:%.*]] = zext i32 [[TMP108]] to i64 +; CHECK-NEXT: [[TMP120:%.*]] = mul i64 [[TMP118]], [[TMP119]] +; CHECK-NEXT: [[TMP121:%.*]] = trunc i64 [[TMP120]] to i32 +; CHECK-NEXT: [[TMP122:%.*]] = lshr i64 [[TMP120]], 32 +; CHECK-NEXT: [[TMP123:%.*]] = trunc i64 [[TMP122]] to i32 +; CHECK-NEXT: [[TMP124:%.*]] = add i32 [[TMP108]], [[TMP123]] +; CHECK-NEXT: [[TMP125:%.*]] = sub i32 [[TMP108]], [[TMP123]] +; CHECK-NEXT: [[TMP126:%.*]] = select i1 [[TMP116]], i32 [[TMP124]], i32 [[TMP125]] +; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 +; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP103]] to i64 +; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] +; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 +; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 +; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 +; CHECK-NEXT: [[TMP133:%.*]] = mul i32 [[TMP132]], [[TMP104]] +; CHECK-NEXT: [[TMP134:%.*]] = sub i32 [[TMP103]], [[TMP133]] +; CHECK-NEXT: [[TMP135:%.*]] = icmp uge i32 [[TMP134]], [[TMP104]] +; CHECK-NEXT: [[TMP136:%.*]] = icmp uge i32 [[TMP103]], [[TMP133]] +; CHECK-NEXT: [[TMP137:%.*]] = and i1 [[TMP135]], [[TMP136]] +; CHECK-NEXT: [[TMP138:%.*]] = sub i32 [[TMP134]], [[TMP104]] +; CHECK-NEXT: [[TMP139:%.*]] = add i32 [[TMP134]], [[TMP104]] +; CHECK-NEXT: [[TMP140:%.*]] = select i1 [[TMP137]], i32 [[TMP138]], i32 [[TMP134]] +; CHECK-NEXT: [[TMP141:%.*]] = select i1 [[TMP136]], i32 [[TMP140]], i32 [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = xor i32 [[TMP141]], [[TMP99]] +; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP142]], [[TMP99]] +; CHECK-NEXT: [[TMP144:%.*]] = insertelement <4 x i32> [[TMP96]], i32 [[TMP143]], i64 2 +; CHECK-NEXT: [[TMP145:%.*]] = extractelement <4 x i32> [[X]], i64 3 +; CHECK-NEXT: [[TMP146:%.*]] = extractelement <4 x i32> [[Y]], i64 3 +; CHECK-NEXT: [[TMP147:%.*]] = ashr i32 [[TMP145]], 31 +; CHECK-NEXT: [[TMP148:%.*]] = ashr i32 [[TMP146]], 31 +; CHECK-NEXT: [[TMP149:%.*]] = add i32 [[TMP145]], [[TMP147]] +; CHECK-NEXT: [[TMP150:%.*]] = add i32 [[TMP146]], [[TMP148]] +; CHECK-NEXT: [[TMP151:%.*]] = xor i32 [[TMP149]], [[TMP147]] +; CHECK-NEXT: [[TMP152:%.*]] = xor i32 [[TMP150]], [[TMP148]] +; CHECK-NEXT: [[TMP153:%.*]] = uitofp i32 [[TMP152]] to float +; CHECK-NEXT: [[TMP154:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP153]]) +; CHECK-NEXT: [[TMP155:%.*]] = fmul fast float [[TMP154]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP156:%.*]] = fptoui float [[TMP155]] to i32 +; CHECK-NEXT: [[TMP157:%.*]] = zext i32 [[TMP156]] to i64 +; CHECK-NEXT: [[TMP158:%.*]] = zext i32 [[TMP152]] to i64 +; CHECK-NEXT: [[TMP159:%.*]] = mul i64 [[TMP157]], [[TMP158]] +; CHECK-NEXT: [[TMP160:%.*]] = trunc i64 [[TMP159]] to i32 +; CHECK-NEXT: [[TMP161:%.*]] = lshr i64 [[TMP159]], 32 +; CHECK-NEXT: [[TMP162:%.*]] = trunc i64 [[TMP161]] to i32 +; CHECK-NEXT: [[TMP163:%.*]] = sub i32 0, [[TMP160]] +; CHECK-NEXT: [[TMP164:%.*]] = icmp eq i32 [[TMP162]], 0 +; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 [[TMP163]], i32 [[TMP160]] ; CHECK-NEXT: [[TMP166:%.*]] = zext i32 [[TMP165]] to i64 -; CHECK-NEXT: [[TMP167:%.*]] = zext i32 [[TMP161]] to i64 +; CHECK-NEXT: [[TMP167:%.*]] = zext i32 [[TMP156]] to i64 ; CHECK-NEXT: [[TMP168:%.*]] = mul i64 [[TMP166]], [[TMP167]] ; CHECK-NEXT: [[TMP169:%.*]] = trunc i64 [[TMP168]] to i32 ; CHECK-NEXT: [[TMP170:%.*]] = lshr i64 [[TMP168]], 32 ; CHECK-NEXT: [[TMP171:%.*]] = trunc i64 [[TMP170]] to i32 -; CHECK-NEXT: [[TMP172:%.*]] = sub i32 0, [[TMP169]] -; CHECK-NEXT: [[TMP173:%.*]] = icmp eq i32 [[TMP171]], 0 -; CHECK-NEXT: [[TMP174:%.*]] = select i1 [[TMP173]], i32 [[TMP172]], i32 [[TMP169]] +; CHECK-NEXT: [[TMP172:%.*]] = add i32 [[TMP156]], [[TMP171]] +; CHECK-NEXT: [[TMP173:%.*]] = sub i32 [[TMP156]], [[TMP171]] +; CHECK-NEXT: [[TMP174:%.*]] = select i1 [[TMP164]], i32 [[TMP172]], i32 [[TMP173]] ; CHECK-NEXT: [[TMP175:%.*]] = zext i32 [[TMP174]] to i64 -; CHECK-NEXT: [[TMP176:%.*]] = zext i32 [[TMP165]] to i64 +; CHECK-NEXT: [[TMP176:%.*]] = zext i32 [[TMP151]] to i64 ; CHECK-NEXT: [[TMP177:%.*]] = mul i64 [[TMP175]], [[TMP176]] ; CHECK-NEXT: [[TMP178:%.*]] = trunc i64 [[TMP177]] to i32 ; CHECK-NEXT: [[TMP179:%.*]] = lshr i64 [[TMP177]], 32 ; CHECK-NEXT: [[TMP180:%.*]] = trunc i64 [[TMP179]] to i32 -; CHECK-NEXT: [[TMP181:%.*]] = add i32 [[TMP165]], [[TMP180]] -; CHECK-NEXT: [[TMP182:%.*]] = sub i32 [[TMP165]], [[TMP180]] -; CHECK-NEXT: [[TMP183:%.*]] = select i1 [[TMP173]], i32 [[TMP181]], i32 [[TMP182]] -; CHECK-NEXT: [[TMP184:%.*]] = zext i32 [[TMP183]] to i64 -; CHECK-NEXT: [[TMP185:%.*]] = zext i32 [[TMP160]] to i64 -; CHECK-NEXT: [[TMP186:%.*]] = mul i64 [[TMP184]], [[TMP185]] -; CHECK-NEXT: [[TMP187:%.*]] = trunc i64 [[TMP186]] to i32 -; CHECK-NEXT: [[TMP188:%.*]] = lshr i64 [[TMP186]], 32 -; CHECK-NEXT: [[TMP189:%.*]] = trunc i64 [[TMP188]] to i32 -; CHECK-NEXT: [[TMP190:%.*]] = mul i32 [[TMP189]], [[TMP161]] -; CHECK-NEXT: [[TMP191:%.*]] = sub i32 [[TMP160]], [[TMP190]] -; CHECK-NEXT: [[TMP192:%.*]] = icmp uge i32 [[TMP191]], [[TMP161]] -; CHECK-NEXT: [[TMP193:%.*]] = select i1 [[TMP192]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP194:%.*]] = icmp uge i32 [[TMP160]], [[TMP190]] -; CHECK-NEXT: [[TMP195:%.*]] = select i1 [[TMP194]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP196:%.*]] = and i32 [[TMP193]], [[TMP195]] -; CHECK-NEXT: [[TMP197:%.*]] = icmp eq i32 [[TMP196]], 0 -; CHECK-NEXT: [[TMP198:%.*]] = sub i32 [[TMP191]], [[TMP161]] -; CHECK-NEXT: [[TMP199:%.*]] = add i32 [[TMP191]], [[TMP161]] -; CHECK-NEXT: [[TMP200:%.*]] = select i1 [[TMP197]], i32 [[TMP191]], i32 [[TMP198]] -; CHECK-NEXT: [[TMP201:%.*]] = select i1 [[TMP194]], i32 [[TMP200]], i32 [[TMP199]] -; CHECK-NEXT: [[TMP202:%.*]] = xor i32 [[TMP201]], [[TMP156]] -; CHECK-NEXT: [[TMP203:%.*]] = sub i32 [[TMP202]], [[TMP156]] -; CHECK-NEXT: [[TMP204:%.*]] = insertelement <4 x i32> [[TMP153]], i32 [[TMP203]], i64 3 -; CHECK-NEXT: store <4 x i32> [[TMP204]], <4 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP181:%.*]] = mul i32 [[TMP180]], [[TMP152]] +; CHECK-NEXT: [[TMP182:%.*]] = sub i32 [[TMP151]], [[TMP181]] +; CHECK-NEXT: [[TMP183:%.*]] = icmp uge i32 [[TMP182]], [[TMP152]] +; CHECK-NEXT: [[TMP184:%.*]] = icmp uge i32 [[TMP151]], [[TMP181]] +; CHECK-NEXT: [[TMP185:%.*]] = and i1 [[TMP183]], [[TMP184]] +; CHECK-NEXT: [[TMP186:%.*]] = sub i32 [[TMP182]], [[TMP152]] +; CHECK-NEXT: [[TMP187:%.*]] = add i32 [[TMP182]], [[TMP152]] +; CHECK-NEXT: [[TMP188:%.*]] = select i1 [[TMP185]], i32 [[TMP186]], i32 [[TMP182]] +; CHECK-NEXT: [[TMP189:%.*]] = select i1 [[TMP184]], i32 [[TMP188]], i32 [[TMP187]] +; CHECK-NEXT: [[TMP190:%.*]] = xor i32 [[TMP189]], [[TMP147]] +; CHECK-NEXT: [[TMP191:%.*]] = sub i32 [[TMP190]], [[TMP147]] +; CHECK-NEXT: [[TMP192:%.*]] = insertelement <4 x i32> [[TMP144]], i32 [[TMP191]], i64 3 +; CHECK-NEXT: store <4 x i32> [[TMP192]], <4 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: srem_v4i32: @@ -4304,60 +4244,54 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP30]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i32> undef, i32 [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 -; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float -; CHECK-NEXT: [[TMP47:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP46]]) -; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 -; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 -; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 -; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] -; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 -; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 -; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 -; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 -; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 -; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 -; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] -; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 -; CHECK-NEXT: [[TMP82:%.*]] = add i32 [[TMP73]], 1 -; CHECK-NEXT: [[TMP83:%.*]] = sub i32 [[TMP73]], 1 -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP73]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <2 x i32> [[TMP43]], i32 [[TMP85]], i64 1 -; CHECK-NEXT: store <2 x i32> [[TMP86]], <2 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP36]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i32> undef, i32 [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = uitofp i32 [[TMP42]] to float +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = fmul fast float [[TMP44]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP46:%.*]] = fptoui float [[TMP45]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = trunc i64 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = lshr i64 [[TMP49]], 32 +; CHECK-NEXT: [[TMP52:%.*]] = trunc i64 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP53:%.*]] = sub i32 0, [[TMP50]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i32 [[TMP52]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[TMP53]], i32 [[TMP50]] +; CHECK-NEXT: [[TMP56:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = lshr i64 [[TMP58]], 32 +; CHECK-NEXT: [[TMP61:%.*]] = trunc i64 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP54]], i32 [[TMP62]], i32 [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP64]] to i64 +; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP41]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = mul i64 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP67]], 32 +; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP71:%.*]] = mul i32 [[TMP70]], [[TMP42]] +; CHECK-NEXT: [[TMP72:%.*]] = sub i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp uge i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp uge i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP75:%.*]] = and i1 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = add i32 [[TMP70]], 1 +; CHECK-NEXT: [[TMP77:%.*]] = sub i32 [[TMP70]], 1 +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP75]], i32 [[TMP76]], i32 [[TMP70]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP74]], i32 [[TMP78]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <2 x i32> [[TMP40]], i32 [[TMP79]], i64 1 +; CHECK-NEXT: store <2 x i32> [[TMP80]], <2 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: udiv_v2i32_pow2_shl_denom: @@ -4569,60 +4503,54 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP32]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i32> undef, i32 [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 -; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float -; CHECK-NEXT: [[TMP47:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP46]]) -; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 -; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 -; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 -; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] -; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 -; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 -; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 -; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 -; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 -; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 -; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] -; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 -; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP83:%.*]] = add i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP75]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <2 x i32> [[TMP43]], i32 [[TMP85]], i64 1 -; CHECK-NEXT: store <2 x i32> [[TMP86]], <2 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = sub i32 [[TMP32]], [[TMP2]] +; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP2]] +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP36]], i32 [[TMP32]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i32> undef, i32 [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = uitofp i32 [[TMP42]] to float +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = fmul fast float [[TMP44]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP46:%.*]] = fptoui float [[TMP45]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = trunc i64 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = lshr i64 [[TMP49]], 32 +; CHECK-NEXT: [[TMP52:%.*]] = trunc i64 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP53:%.*]] = sub i32 0, [[TMP50]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i32 [[TMP52]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[TMP53]], i32 [[TMP50]] +; CHECK-NEXT: [[TMP56:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = lshr i64 [[TMP58]], 32 +; CHECK-NEXT: [[TMP61:%.*]] = trunc i64 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP54]], i32 [[TMP62]], i32 [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP64]] to i64 +; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP41]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = mul i64 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP67]], 32 +; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP71:%.*]] = mul i32 [[TMP70]], [[TMP42]] +; CHECK-NEXT: [[TMP72:%.*]] = sub i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp uge i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp uge i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP75:%.*]] = and i1 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = sub i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP77:%.*]] = add i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP75]], i32 [[TMP76]], i32 [[TMP72]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP74]], i32 [[TMP78]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <2 x i32> [[TMP40]], i32 [[TMP79]], i64 1 +; CHECK-NEXT: store <2 x i32> [[TMP80]], <2 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: urem_v2i32_pow2_shl_denom: @@ -4912,71 +4840,65 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP9]] ; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP8]], [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP39]], [[TMP9]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP42:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] -; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP41]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP44]], 0 -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP37]], 1 -; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP37]], 1 -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP45]], i32 [[TMP37]], i32 [[TMP46]] -; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP42]], i32 [[TMP48]], i32 [[TMP47]] -; CHECK-NEXT: [[TMP50:%.*]] = xor i32 [[TMP49]], [[TMP5]] -; CHECK-NEXT: [[TMP51:%.*]] = sub i32 [[TMP50]], [[TMP5]] -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x i32> undef, i32 [[TMP51]], i64 0 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <2 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 -; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 -; CHECK-NEXT: [[TMP56:%.*]] = ashr i32 [[TMP54]], 31 -; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] -; CHECK-NEXT: [[TMP58:%.*]] = add i32 [[TMP53]], [[TMP55]] -; CHECK-NEXT: [[TMP59:%.*]] = add i32 [[TMP54]], [[TMP56]] -; CHECK-NEXT: [[TMP60:%.*]] = xor i32 [[TMP58]], [[TMP55]] -; CHECK-NEXT: [[TMP61:%.*]] = xor i32 [[TMP59]], [[TMP56]] -; CHECK-NEXT: [[TMP62:%.*]] = uitofp i32 [[TMP61]] to float -; CHECK-NEXT: [[TMP63:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP62]]) -; CHECK-NEXT: [[TMP64:%.*]] = fmul fast float [[TMP63]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP65:%.*]] = fptoui float [[TMP64]] to i32 -; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP65]] to i64 -; CHECK-NEXT: [[TMP67:%.*]] = zext i32 [[TMP61]] to i64 -; CHECK-NEXT: [[TMP68:%.*]] = mul i64 [[TMP66]], [[TMP67]] -; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 -; CHECK-NEXT: [[TMP70:%.*]] = lshr i64 [[TMP68]], 32 -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = sub i32 0, [[TMP69]] -; CHECK-NEXT: [[TMP73:%.*]] = icmp eq i32 [[TMP71]], 0 -; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[TMP72]], i32 [[TMP69]] -; CHECK-NEXT: [[TMP75:%.*]] = zext i32 [[TMP74]] to i64 -; CHECK-NEXT: [[TMP76:%.*]] = zext i32 [[TMP65]] to i64 -; CHECK-NEXT: [[TMP77:%.*]] = mul i64 [[TMP75]], [[TMP76]] -; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 -; CHECK-NEXT: [[TMP79:%.*]] = lshr i64 [[TMP77]], 32 -; CHECK-NEXT: [[TMP80:%.*]] = trunc i64 [[TMP79]] to i32 -; CHECK-NEXT: [[TMP81:%.*]] = add i32 [[TMP65]], [[TMP80]] -; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP65]], [[TMP80]] -; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP73]], i32 [[TMP81]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP84:%.*]] = zext i32 [[TMP83]] to i64 -; CHECK-NEXT: [[TMP85:%.*]] = zext i32 [[TMP60]] to i64 -; CHECK-NEXT: [[TMP86:%.*]] = mul i64 [[TMP84]], [[TMP85]] -; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 -; CHECK-NEXT: [[TMP88:%.*]] = lshr i64 [[TMP86]], 32 -; CHECK-NEXT: [[TMP89:%.*]] = trunc i64 [[TMP88]] to i32 -; CHECK-NEXT: [[TMP90:%.*]] = mul i32 [[TMP89]], [[TMP61]] -; CHECK-NEXT: [[TMP91:%.*]] = sub i32 [[TMP60]], [[TMP90]] -; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP91]], [[TMP61]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP94:%.*]] = icmp uge i32 [[TMP60]], [[TMP90]] -; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP96:%.*]] = and i32 [[TMP93]], [[TMP95]] -; CHECK-NEXT: [[TMP97:%.*]] = icmp eq i32 [[TMP96]], 0 -; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP89]], 1 -; CHECK-NEXT: [[TMP99:%.*]] = sub i32 [[TMP89]], 1 -; CHECK-NEXT: [[TMP100:%.*]] = select i1 [[TMP97]], i32 [[TMP89]], i32 [[TMP98]] -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP94]], i32 [[TMP100]], i32 [[TMP99]] -; CHECK-NEXT: [[TMP102:%.*]] = xor i32 [[TMP101]], [[TMP57]] -; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP102]], [[TMP57]] -; CHECK-NEXT: [[TMP104:%.*]] = insertelement <2 x i32> [[TMP52]], i32 [[TMP103]], i64 1 -; CHECK-NEXT: store <2 x i32> [[TMP104]], <2 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] +; CHECK-NEXT: [[TMP42:%.*]] = and i1 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP37]], 1 +; CHECK-NEXT: [[TMP44:%.*]] = sub i32 [[TMP37]], 1 +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP42]], i32 [[TMP43]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP41]], i32 [[TMP45]], i32 [[TMP44]] +; CHECK-NEXT: [[TMP47:%.*]] = xor i32 [[TMP46]], [[TMP5]] +; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], [[TMP5]] +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <2 x i32> undef, i32 [[TMP48]], i64 0 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <2 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 +; CHECK-NEXT: [[TMP52:%.*]] = ashr i32 [[TMP50]], 31 +; CHECK-NEXT: [[TMP53:%.*]] = ashr i32 [[TMP51]], 31 +; CHECK-NEXT: [[TMP54:%.*]] = xor i32 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = add i32 [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP51]], [[TMP53]] +; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP52]] +; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP53]] +; CHECK-NEXT: [[TMP59:%.*]] = uitofp i32 [[TMP58]] to float +; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP59]]) +; CHECK-NEXT: [[TMP61:%.*]] = fmul fast float [[TMP60]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP62:%.*]] = fptoui float [[TMP61]] to i32 +; CHECK-NEXT: [[TMP63:%.*]] = zext i32 [[TMP62]] to i64 +; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP58]] to i64 +; CHECK-NEXT: [[TMP65:%.*]] = mul i64 [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = trunc i64 [[TMP65]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = lshr i64 [[TMP65]], 32 +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = sub i32 0, [[TMP66]] +; CHECK-NEXT: [[TMP70:%.*]] = icmp eq i32 [[TMP68]], 0 +; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP69]], i32 [[TMP66]] +; CHECK-NEXT: [[TMP72:%.*]] = zext i32 [[TMP71]] to i64 +; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP62]] to i64 +; CHECK-NEXT: [[TMP74:%.*]] = mul i64 [[TMP72]], [[TMP73]] +; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i32 +; CHECK-NEXT: [[TMP76:%.*]] = lshr i64 [[TMP74]], 32 +; CHECK-NEXT: [[TMP77:%.*]] = trunc i64 [[TMP76]] to i32 +; CHECK-NEXT: [[TMP78:%.*]] = add i32 [[TMP62]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = sub i32 [[TMP62]], [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = select i1 [[TMP70]], i32 [[TMP78]], i32 [[TMP79]] +; CHECK-NEXT: [[TMP81:%.*]] = zext i32 [[TMP80]] to i64 +; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP57]] to i64 +; CHECK-NEXT: [[TMP83:%.*]] = mul i64 [[TMP81]], [[TMP82]] +; CHECK-NEXT: [[TMP84:%.*]] = trunc i64 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP85:%.*]] = lshr i64 [[TMP83]], 32 +; CHECK-NEXT: [[TMP86:%.*]] = trunc i64 [[TMP85]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = mul i32 [[TMP86]], [[TMP58]] +; CHECK-NEXT: [[TMP88:%.*]] = sub i32 [[TMP57]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = icmp uge i32 [[TMP88]], [[TMP58]] +; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP57]], [[TMP87]] +; CHECK-NEXT: [[TMP91:%.*]] = and i1 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP86]], 1 +; CHECK-NEXT: [[TMP93:%.*]] = sub i32 [[TMP86]], 1 +; CHECK-NEXT: [[TMP94:%.*]] = select i1 [[TMP91]], i32 [[TMP92]], i32 [[TMP86]] +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP90]], i32 [[TMP94]], i32 [[TMP93]] +; CHECK-NEXT: [[TMP96:%.*]] = xor i32 [[TMP95]], [[TMP54]] +; CHECK-NEXT: [[TMP97:%.*]] = sub i32 [[TMP96]], [[TMP54]] +; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i32> [[TMP49]], i32 [[TMP97]], i64 1 +; CHECK-NEXT: store <2 x i32> [[TMP98]], <2 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: sdiv_v2i32_pow2_shl_denom: @@ -5252,70 +5174,64 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], [[TMP8]] ; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP7]], [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], [[TMP42]] -; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], 0 -; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP44]], i32 [[TMP38]], i32 [[TMP45]] -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], i32 [[TMP47]], i32 [[TMP46]] -; CHECK-NEXT: [[TMP49:%.*]] = xor i32 [[TMP48]], [[TMP3]] -; CHECK-NEXT: [[TMP50:%.*]] = sub i32 [[TMP49]], [[TMP3]] -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <2 x i32> undef, i32 [[TMP50]], i64 0 -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <2 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP52]], 31 -; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 -; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP52]], [[TMP54]] -; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP53]], [[TMP55]] -; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP54]] -; CHECK-NEXT: [[TMP59:%.*]] = xor i32 [[TMP57]], [[TMP55]] -; CHECK-NEXT: [[TMP60:%.*]] = uitofp i32 [[TMP59]] to float -; CHECK-NEXT: [[TMP61:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP60]]) -; CHECK-NEXT: [[TMP62:%.*]] = fmul fast float [[TMP61]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP63:%.*]] = fptoui float [[TMP62]] to i32 -; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP63]] to i64 -; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP59]] to i64 -; CHECK-NEXT: [[TMP66:%.*]] = mul i64 [[TMP64]], [[TMP65]] -; CHECK-NEXT: [[TMP67:%.*]] = trunc i64 [[TMP66]] to i32 -; CHECK-NEXT: [[TMP68:%.*]] = lshr i64 [[TMP66]], 32 -; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 -; CHECK-NEXT: [[TMP70:%.*]] = sub i32 0, [[TMP67]] -; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[TMP69]], 0 -; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP70]], i32 [[TMP67]] -; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP72]] to i64 -; CHECK-NEXT: [[TMP74:%.*]] = zext i32 [[TMP63]] to i64 -; CHECK-NEXT: [[TMP75:%.*]] = mul i64 [[TMP73]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = trunc i64 [[TMP75]] to i32 -; CHECK-NEXT: [[TMP77:%.*]] = lshr i64 [[TMP75]], 32 -; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 -; CHECK-NEXT: [[TMP79:%.*]] = add i32 [[TMP63]], [[TMP78]] -; CHECK-NEXT: [[TMP80:%.*]] = sub i32 [[TMP63]], [[TMP78]] -; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP71]], i32 [[TMP79]], i32 [[TMP80]] -; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP81]] to i64 -; CHECK-NEXT: [[TMP83:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP82]], [[TMP83]] -; CHECK-NEXT: [[TMP85:%.*]] = trunc i64 [[TMP84]] to i32 -; CHECK-NEXT: [[TMP86:%.*]] = lshr i64 [[TMP84]], 32 -; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 -; CHECK-NEXT: [[TMP88:%.*]] = mul i32 [[TMP87]], [[TMP59]] -; CHECK-NEXT: [[TMP89:%.*]] = sub i32 [[TMP58]], [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP58]], [[TMP88]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP94:%.*]] = and i32 [[TMP91]], [[TMP93]] -; CHECK-NEXT: [[TMP95:%.*]] = icmp eq i32 [[TMP94]], 0 -; CHECK-NEXT: [[TMP96:%.*]] = sub i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP97:%.*]] = add i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP95]], i32 [[TMP89]], i32 [[TMP96]] -; CHECK-NEXT: [[TMP99:%.*]] = select i1 [[TMP92]], i32 [[TMP98]], i32 [[TMP97]] -; CHECK-NEXT: [[TMP100:%.*]] = xor i32 [[TMP99]], [[TMP54]] -; CHECK-NEXT: [[TMP101:%.*]] = sub i32 [[TMP100]], [[TMP54]] -; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i32> [[TMP51]], i32 [[TMP101]], i64 1 -; CHECK-NEXT: store <2 x i32> [[TMP102]], <2 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] +; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = sub i32 [[TMP38]], [[TMP8]] +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP38]], [[TMP8]] +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP41]], i32 [[TMP42]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP40]], i32 [[TMP44]], i32 [[TMP43]] +; CHECK-NEXT: [[TMP46:%.*]] = xor i32 [[TMP45]], [[TMP3]] +; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP46]], [[TMP3]] +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <2 x i32> undef, i32 [[TMP47]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = ashr i32 [[TMP49]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = ashr i32 [[TMP50]], 31 +; CHECK-NEXT: [[TMP53:%.*]] = add i32 [[TMP49]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = add i32 [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP55:%.*]] = xor i32 [[TMP53]], [[TMP51]] +; CHECK-NEXT: [[TMP56:%.*]] = xor i32 [[TMP54]], [[TMP52]] +; CHECK-NEXT: [[TMP57:%.*]] = uitofp i32 [[TMP56]] to float +; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]]) +; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP58]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP60:%.*]] = fptoui float [[TMP59]] to i32 +; CHECK-NEXT: [[TMP61:%.*]] = zext i32 [[TMP60]] to i64 +; CHECK-NEXT: [[TMP62:%.*]] = zext i32 [[TMP56]] to i64 +; CHECK-NEXT: [[TMP63:%.*]] = mul i64 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 +; CHECK-NEXT: [[TMP65:%.*]] = lshr i64 [[TMP63]], 32 +; CHECK-NEXT: [[TMP66:%.*]] = trunc i64 [[TMP65]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = sub i32 0, [[TMP64]] +; CHECK-NEXT: [[TMP68:%.*]] = icmp eq i32 [[TMP66]], 0 +; CHECK-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP67]], i32 [[TMP64]] +; CHECK-NEXT: [[TMP70:%.*]] = zext i32 [[TMP69]] to i64 +; CHECK-NEXT: [[TMP71:%.*]] = zext i32 [[TMP60]] to i64 +; CHECK-NEXT: [[TMP72:%.*]] = mul i64 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 +; CHECK-NEXT: [[TMP74:%.*]] = lshr i64 [[TMP72]], 32 +; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i32 +; CHECK-NEXT: [[TMP76:%.*]] = add i32 [[TMP60]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = sub i32 [[TMP60]], [[TMP75]] +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP68]], i32 [[TMP76]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = zext i32 [[TMP78]] to i64 +; CHECK-NEXT: [[TMP80:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP81:%.*]] = mul i64 [[TMP79]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = trunc i64 [[TMP81]] to i32 +; CHECK-NEXT: [[TMP83:%.*]] = lshr i64 [[TMP81]], 32 +; CHECK-NEXT: [[TMP84:%.*]] = trunc i64 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP85:%.*]] = mul i32 [[TMP84]], [[TMP56]] +; CHECK-NEXT: [[TMP86:%.*]] = sub i32 [[TMP55]], [[TMP85]] +; CHECK-NEXT: [[TMP87:%.*]] = icmp uge i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP88:%.*]] = icmp uge i32 [[TMP55]], [[TMP85]] +; CHECK-NEXT: [[TMP89:%.*]] = and i1 [[TMP87]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = sub i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP91:%.*]] = add i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP89]], i32 [[TMP90]], i32 [[TMP86]] +; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP88]], i32 [[TMP92]], i32 [[TMP91]] +; CHECK-NEXT: [[TMP94:%.*]] = xor i32 [[TMP93]], [[TMP51]] +; CHECK-NEXT: [[TMP95:%.*]] = sub i32 [[TMP94]], [[TMP51]] +; CHECK-NEXT: [[TMP96:%.*]] = insertelement <2 x i32> [[TMP48]], i32 [[TMP95]], i64 1 +; CHECK-NEXT: store <2 x i32> [[TMP96]], <2 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: srem_v2i32_pow2_shl_denom: -- 2.7.4