From 858fe8664eddd6d73241c1103213a00268528597 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Thu, 1 Sep 2022 17:04:26 +0100 Subject: [PATCH] Expand Div/Rem: consider the case where the dividend is zero So we can't use ctlz in poison-producing mode --- llvm/lib/Transforms/Utils/IntegerDivision.cpp | 8 +-- llvm/test/CodeGen/AMDGPU/sdiv64.ll | 55 +++++++++---------- llvm/test/CodeGen/AMDGPU/srem64.ll | 64 +++++++++++------------ llvm/test/CodeGen/AMDGPU/udiv64.ll | 60 ++++++++++----------- llvm/test/CodeGen/AMDGPU/urem64.ll | 57 ++++++++++---------- llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll | 4 +- llvm/test/Transforms/ExpandLargeDivRem/srem129.ll | 4 +- llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll | 4 +- llvm/test/Transforms/ExpandLargeDivRem/urem129.ll | 4 +- 9 files changed, 123 insertions(+), 137 deletions(-) diff --git a/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/llvm/lib/Transforms/Utils/IntegerDivision.cpp index 1267360..1750b5c 100644 --- a/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -214,10 +214,10 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) // ; %sr = sub nsw i32 %tmp0, %tmp1 // ; %ret0_4 = icmp ugt i32 %sr, 31 - // ; %ret0 = or i1 %ret0_3, %ret0_4 + // ; %ret0 = select i1 %ret0_3, i1 true, i1 %ret0_4 // ; %retDividend = icmp eq i32 %sr, 31 // ; %retVal = select i1 %ret0, i32 0, i32 %dividend - // ; %earlyRet = or i1 %ret0, %retDividend + // ; %earlyRet = select i1 %ret0, i1 true, %retDividend // ; br i1 %earlyRet, label %end, label %bb1 Builder.SetInsertPoint(SpecialCases); Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); @@ -227,10 +227,10 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True}); Value *SR = Builder.CreateSub(Tmp0, Tmp1); Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB); - Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); + Value *Ret0 = Builder.CreateLogicalOr(Ret0_3, Ret0_4); Value *RetDividend = Builder.CreateICmpEQ(SR, MSB); Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); - Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); + Value *EarlyRet = Builder.CreateLogicalOr(Ret0, RetDividend); Builder.CreateCondBr(EarlyRet, End, BB1); // ; bb1: ; preds = %special-cases diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index afd429d..1d3ae98 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -168,12 +168,11 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: s_sub_u32 s10, s14, s18 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s15, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1 -; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s16, s10, 1 @@ -524,7 +523,7 @@ define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -694,7 +693,7 @@ define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 31 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -761,7 +760,7 @@ define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 23 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -828,7 +827,7 @@ define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 25 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -910,7 +909,7 @@ define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[10:11], 40 ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v2, s10 ; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[6:7], 40 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v3, s6 @@ -926,7 +925,7 @@ define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| ; GCN-IR-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 -; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-IR-NEXT: v_bfe_i32 v2, v2, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v3, 31, v2 @@ -1008,12 +1007,11 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-NEXT: s_sub_u32 s10, s14, s18 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s15, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1 -; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s16, s10, 1 @@ -1208,20 +1206,19 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3] ; GCN-IR-NEXT: s_sub_u32 s2, s2, s4 ; GCN-IR-NEXT: s_subb_u32 s3, s3, s4 -; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 -; GCN-IR-NEXT: s_add_i32 s6, s6, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 -; GCN-IR-NEXT: s_min_u32 s10, s6, s7 +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2 +; GCN-IR-NEXT: s_add_i32 s8, s8, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3 +; GCN-IR-NEXT: s_min_u32 s10, s8, s9 ; GCN-IR-NEXT: s_add_u32 s8, s10, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[14:15], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[8:9], 63 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[14:15] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] ; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[14:15], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[16:17], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[14:15], s[16:17], s[14:15] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[14:15] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -1823,7 +1820,7 @@ define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 % ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -1880,7 +1877,7 @@ define amdgpu_kernel void @s_test_sdiv24_k_den_i64(i64 addrspace(1)* %out, i64 % ; GCN-IR-NEXT: v_mov_b32_e32 v2, s0 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s8 ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 1c62354..2fb2db8 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -140,12 +140,11 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -202,8 +201,8 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0 ; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: s_mov_b32 s10, -1 @@ -505,7 +504,7 @@ define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 @@ -576,7 +575,7 @@ define amdgpu_kernel void @s_test_srem24_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 @@ -701,7 +700,7 @@ define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 @@ -839,7 +838,7 @@ define amdgpu_kernel void @s_test_srem32_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s3, v0 @@ -1021,12 +1020,11 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: s_sub_u32 s10, s12, s16 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s13, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1 -; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s14, s10, 1 @@ -1174,12 +1172,11 @@ define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-NEXT: s_sub_u32 s10, s12, s16 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s13, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1 -; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s14, s10, 1 @@ -1376,20 +1373,19 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7] ; GCN-IR-NEXT: s_sub_u32 s4, s2, s6 ; GCN-IR-NEXT: s_subb_u32 s5, s3, s6 -; GCN-IR-NEXT: s_flbit_i32_b32 s2, s4 -; GCN-IR-NEXT: s_add_i32 s2, s2, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s3, s5 -; GCN-IR-NEXT: s_min_u32 s8, s2, s3 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s4 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s5 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[4:5], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[4:5], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[2:3], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[2:3], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[2:3] ; GCN-IR-NEXT: s_mov_b64 s[2:3], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1993,7 +1989,7 @@ define amdgpu_kernel void @s_test_srem24_k_num_i64(i64 addrspace(1)* %out, i64 % ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, 24, v0 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 @@ -2055,7 +2051,7 @@ define amdgpu_kernel void @s_test_srem24_k_den_i64(i64 addrspace(1)* %out, i64 % ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4 ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GCN-IR-NEXT: s_movk_i32 s3, 0x5b7f -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s3 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: s_mov_b32 s5, s1 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index 8618438..6ff04fdc 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -141,12 +141,11 @@ define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -811,12 +810,11 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-NEXT: s_sub_u32 s6, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[6:7], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[6:7], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s6, 1 @@ -990,20 +988,19 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s8, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1439,20 +1436,19 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s10, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s10, s6, s7 ; GCN-IR-NEXT: s_sub_u32 s6, 59, s10 ; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[8:9], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[8:9], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[8:9], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s8, s6, 1 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index e0a6fd9..dbec6f1 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -140,12 +140,11 @@ define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -202,8 +201,8 @@ define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0 ; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: s_mov_b32 s10, -1 @@ -817,20 +816,19 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s8, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1005,20 +1003,19 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s8, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_sub_u32 s6, 59, s8 ; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1071,7 +1068,7 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, 24 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: s_mov_b32 s4, s0 diff --git a/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll index 27ab1e8..ae23e42 100644 --- a/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll +++ b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll @@ -15,10 +15,10 @@ define void @sdiv129(i129* %ptr, i129* %out) nounwind { ; CHECK-NEXT: [[TMP7:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP2]], i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = sub i129 [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i129 [[TMP8]], 128 -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP5]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP5]], i1 true, i1 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i129 [[TMP8]], 128 ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], i129 0, i129 [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP10]], i1 true, i1 [[TMP11]] ; CHECK-NEXT: br i1 [[TMP13]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]] ; CHECK: udiv-loop-exit: ; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP29:%.*]], [[UDIV_DO_WHILE:%.*]] ] diff --git a/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll index 08df750..74aef11 100644 --- a/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll +++ b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll @@ -14,10 +14,10 @@ define void @test(i129* %ptr, i129* %out) nounwind { ; CHECK-NEXT: [[TMP6:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP2]], i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = sub i129 [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i129 [[TMP7]], 128 -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP4]], i1 true, i1 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i129 [[TMP7]], 128 ; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i129 0, i129 [[TMP2]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP9]], i1 true, i1 [[TMP10]] ; CHECK-NEXT: br i1 [[TMP12]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]] ; CHECK: udiv-loop-exit: ; CHECK-NEXT: [[TMP13:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE:%.*]] ] diff --git a/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll index 31705a9..6908e91 100644 --- a/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll +++ b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll @@ -11,10 +11,10 @@ define void @test(i129* %ptr, i129* %out) nounwind { ; CHECK-NEXT: [[TMP3:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = sub i129 [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i129 [[TMP4]], 128 -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i129 [[TMP4]], 128 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], i129 0, i129 [[A]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP6]], i1 true, i1 [[TMP7]] ; CHECK-NEXT: br i1 [[TMP9]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]] ; CHECK: udiv-loop-exit: ; CHECK-NEXT: [[TMP10:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE:%.*]] ] diff --git a/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll index 4e91eef..3342b24 100644 --- a/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll +++ b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll @@ -11,10 +11,10 @@ define void @test(i129* %ptr, i129* %out) nounwind { ; CHECK-NEXT: [[TMP3:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = sub i129 [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i129 [[TMP4]], 128 -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i129 [[TMP4]], 128 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], i129 0, i129 [[A]] -; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP6]], i1 true, i1 [[TMP7]] ; CHECK-NEXT: br i1 [[TMP9]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]] ; CHECK: udiv-loop-exit: ; CHECK-NEXT: [[TMP10:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE:%.*]] ] -- 2.7.4