From: Kang Zhang Date: Sat, 28 Dec 2019 09:04:54 +0000 (+0000) Subject: [PowerPC] Modify the hasSideEffects of some VSX instructions from 1 to 0 X-Git-Tag: llvmorg-11-init~1285 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d1b51c5de7a0b7a7d81c3b520614a139eb0160d2;p=platform%2Fupstream%2Fllvm.git [PowerPC] Modify the hasSideEffects of some VSX instructions from 1 to 0 Summary: If we didn't set the value for hasSideEffects bit in our td file, `llvm-tblgen` will set it as true for those instructions which has no match pattern. Below 6 instructions don't set the hasSideEffects flag and don't have match pattern, so their hasSideEffects flag will be set true by llvm-tblgen. But in fact below instructions don't modify any special register and don't have other SideEffects, they shouldn't have SideEffects. This patch is to modify the hasSideEffects of below instructions from 1 to 0. ``` VEXTUHLX VEXTUHRX VEXTUWLX VEXTUWRX VSPLTBs VSPLTHs ``` Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D71391 --- diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 21a432e..1cdb492 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -711,7 +711,7 @@ def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), "vspltw $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>; -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB), "vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>; def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB), @@ -1378,12 +1378,14 @@ def VEXTRACTUW : VX1_VT5_UIM5_VB5<653, "vextractuw", []>; def VEXTRACTD : VX1_VT5_UIM5_VB5<717, "vextractd" , []>; // Vector Extract Unsigned Byte/Halfword/Word Left/Right-Indexed +let hasSideEffects = 0 in { def VEXTUBLX : VX1_RT5_RA5_VB5<1549, "vextublx", []>; def VEXTUBRX : VX1_RT5_RA5_VB5<1805, "vextubrx", []>; def VEXTUHLX : VX1_RT5_RA5_VB5<1613, "vextuhlx", []>; def VEXTUHRX : VX1_RT5_RA5_VB5<1869, "vextuhrx", []>; def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>; def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>; +} // Vector Insert Element Instructions def VINSERTB : VXForm_1<781, (outs vrrc:$vD), diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll index 2beebf0..60df77f 100644 --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -33,9 +33,11 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; CHECK-NEXT: # ; CHECK-NEXT: lfd f0, 0(r3) ; CHECK-NEXT: xxpermdi v1, f0, f0, 2 +; CHECK-NEXT: lfdx f0, r3, r4 ; CHECK-NEXT: vperm v6, v1, v3, v4 ; CHECK-NEXT: vperm v1, v3, v1, v2 ; CHECK-NEXT: xvnegsp v1, v1 +; CHECK-NEXT: add r7, r3, r4 ; CHECK-NEXT: xvnegsp v6, v6 ; CHECK-NEXT: vabsduw v1, v1, v5 ; CHECK-NEXT: vabsduw v6, v6, v0 @@ -44,16 +46,14 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; CHECK-NEXT: vadduwm v1, v1, v6 ; CHECK-NEXT: xxspltw v6, v1, 2 ; CHECK-NEXT: vadduwm v1, v1, v6 -; CHECK-NEXT: vextuwrx r7, r5, v1 -; CHECK-NEXT: lfdx f0, r3, r4 -; CHECK-NEXT: add r6, r7, r6 -; CHECK-NEXT: add r7, r3, r4 -; CHECK-NEXT: xxpermdi v1, f0, f0, 2 +; CHECK-NEXT: xxpermdi v6, f0, f0, 2 +; CHECK-NEXT: vextuwrx r3, r5, v1 +; CHECK-NEXT: vperm v7, v6, v3, v4 +; CHECK-NEXT: vperm v6, v3, v6, v2 +; CHECK-NEXT: add r6, r3, r6 ; CHECK-NEXT: add r3, r7, r4 -; CHECK-NEXT: vperm v6, v3, v1, v2 -; CHECK-NEXT: vperm v1, v1, v3, v4 ; CHECK-NEXT: xvnegsp v6, v6 -; CHECK-NEXT: xvnegsp v1, v1 +; CHECK-NEXT: xvnegsp v1, v7 ; CHECK-NEXT: vabsduw v6, v6, v5 ; CHECK-NEXT: vabsduw v1, v1, v0 ; CHECK-NEXT: vadduwm v1, v1, v6 @@ -92,6 +92,7 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; P9BE-NEXT: # ; P9BE-NEXT: lfd f0, 0(r3) ; P9BE-NEXT: xxlor v1, vs0, vs0 +; P9BE-NEXT: lfdx f0, r3, r4 ; P9BE-NEXT: vperm v6, v3, v1, v4 ; P9BE-NEXT: vperm v1, v3, v1, v2 ; P9BE-NEXT: xvnegsp v1, v1 @@ -103,24 +104,23 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: xxspltw v6, v1, 1 ; P9BE-NEXT: vadduwm v1, v1, v6 -; P9BE-NEXT: vextuwlx r7, r5, v1 -; P9BE-NEXT: lfdx f0, r3, r4 -; P9BE-NEXT: add r6, r7, r6 +; P9BE-NEXT: xxlor v6, vs0, vs0 +; P9BE-NEXT: vperm v7, v3, v6, v4 +; P9BE-NEXT: vperm v6, v3, v6, v2 ; P9BE-NEXT: add r7, r3, r4 -; P9BE-NEXT: xxlor v1, vs0, vs0 -; P9BE-NEXT: add r3, r7, r4 -; P9BE-NEXT: vperm v6, v3, v1, v2 -; P9BE-NEXT: vperm v1, v3, v1, v4 +; P9BE-NEXT: vextuwlx r3, r5, v1 ; P9BE-NEXT: xvnegsp v6, v6 -; P9BE-NEXT: xvnegsp v1, v1 -; P9BE-NEXT: vabsduw v6, v6, v5 +; P9BE-NEXT: xvnegsp v1, v7 ; P9BE-NEXT: vabsduw v1, v1, v0 +; P9BE-NEXT: vabsduw v6, v6, v5 ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: xxswapd v6, v1 +; P9BE-NEXT: add r6, r3, r6 ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: xxspltw v6, v1, 1 ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: vextuwlx r8, r5, v1 +; P9BE-NEXT: add r3, r7, r4 ; P9BE-NEXT: add r6, r8, r6 ; P9BE-NEXT: bdnz .LBB0_1 ; P9BE-NEXT: # %bb.2: # %for.cond.cleanup @@ -385,16 +385,16 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe ; P9BE-NEXT: add r6, r3, r4 ; P9BE-NEXT: li r7, 16 ; P9BE-NEXT: lxsihzx v2, r6, r7 -; P9BE-NEXT: vsplth v2, v2, 3 ; P9BE-NEXT: lxsihzx v4, r3, r4 ; P9BE-NEXT: li r6, 0 ; P9BE-NEXT: sldi r6, r6, 48 +; P9BE-NEXT: vsplth v4, v4, 3 ; P9BE-NEXT: mtvsrd v3, r6 +; P9BE-NEXT: vsplth v2, v2, 3 ; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; P9BE-NEXT: vmrghh v2, v3, v2 -; P9BE-NEXT: vsplth v4, v4, 3 ; P9BE-NEXT: vmrghh v4, v3, v4 +; P9BE-NEXT: vmrghh v2, v3, v2 ; P9BE-NEXT: vsplth v3, v3, 0 ; P9BE-NEXT: vmrghw v3, v3, v4 ; P9BE-NEXT: lxvx v4, 0, r3 @@ -441,21 +441,21 @@ if.end: ; preds = %for.body define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxsibzx v2, r3, r4 ; CHECK-NEXT: add r6, r3, r4 +; CHECK-NEXT: lxsibzx v2, r3, r4 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: mtvsrd f0, r3 ; CHECK-NEXT: li r3, 8 +; CHECK-NEXT: lxsibzx v5, r6, r3 ; CHECK-NEXT: xxswapd v3, vs0 +; CHECK-NEXT: vspltb v4, v3, 15 ; CHECK-NEXT: vspltb v2, v2, 7 -; CHECK-NEXT: lxsibzx v5, r6, r3 -; CHECK-NEXT: vspltb v5, v5, 7 ; CHECK-NEXT: vmrglb v2, v3, v2 -; CHECK-NEXT: vspltb v4, v3, 15 -; CHECK-NEXT: vmrglb v3, v3, v5 ; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-NEXT: vmrglh v2, v2, v4 ; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-NEXT: vspltb v5, v5, 7 +; CHECK-NEXT: vmrglh v2, v2, v4 +; CHECK-NEXT: vmrglb v3, v3, v5 ; CHECK-NEXT: vmrglw v2, v2, v4 ; CHECK-NEXT: vmrglh v3, v3, v4 ; CHECK-NEXT: vmrglw v3, v4, v3 @@ -474,17 +474,17 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext ; P9BE-NEXT: add r6, r3, r4 ; P9BE-NEXT: li r7, 8 ; P9BE-NEXT: lxsibzx v2, r6, r7 -; P9BE-NEXT: vspltb v2, v2, 7 ; P9BE-NEXT: lxsibzx v4, r3, r4 ; P9BE-NEXT: li r6, 0 ; P9BE-NEXT: sldi r6, r6, 56 +; P9BE-NEXT: vspltb v4, v4, 7 ; P9BE-NEXT: mtvsrd v3, r6 +; P9BE-NEXT: vspltb v2, v2, 7 +; P9BE-NEXT: vmrghb v4, v3, v4 ; P9BE-NEXT: vmrghb v2, v3, v2 +; P9BE-NEXT: vspltb v3, v3, 0 ; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P9BE-NEXT: vspltb v4, v4, 7 -; P9BE-NEXT: vmrghb v4, v3, v4 -; P9BE-NEXT: vspltb v3, v3, 0 ; P9BE-NEXT: vmrghh v4, v4, v3 ; P9BE-NEXT: xxspltw v3, v3, 0 ; P9BE-NEXT: vmrghw v2, v4, v2