From 195c22f2733cf923b932412f0fe212f4ef397d2c Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Thu, 24 Sep 2020 14:02:53 +0100 Subject: [PATCH] [ARM] Change VPT state assertion Just because we haven't encountered an instruction setting the VPR, it doesn't mean we can't create a VPT block - the VPR maybe a live-in. Differential Revision: https://reviews.llvm.org/D88224 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 3 +- .../LowOverheadLoops/begin-vpt-without-inst.mir | 117 +++++++++++++++++++++ 2 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 636359d8..72e772e 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -187,7 +187,8 @@ namespace { std::unique_ptr> PredicatedInsts; static void CreateVPTBlock(MachineInstr *MI) { - assert(CurrentPredicates.size() && "Can't begin VPT without predicate"); + assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR)) + && "Can't begin VPT without predicate"); Blocks.emplace_back(MI); // The execution of MI is predicated upon the current set of instructions // that are AND'ed together to form the VPR predicate value. In the case diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir new file mode 100644 index 0000000..1930aca --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir @@ -0,0 +1,117 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +--- | + @arr = external dso_local local_unnamed_addr global [0 x i32], align 4 + + define dso_local arm_aapcs_vfpcc void @foo(i32 %i) { + entry: + %tobool.not11 = icmp eq i32 %i, 0 + br i1 %tobool.not11, label %for.end5, label %vector.ph.preheader + + vector.ph.preheader: ; preds = %entry + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 3) + br label %vector.ph + + vector.ph: ; preds = %vector.ph.preheader, %vector.ph + %i.addr.012 = phi i32 [ %math, %vector.ph ], [ %i, %vector.ph.preheader ] + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> , <4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*), i32 4, <4 x i1> %active.lane.mask) + %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.addr.012, i32 1) + %math = extractvalue { i32, i1 } %0, 0 + %ov = extractvalue { i32, i1 } %0, 1 + br i1 %ov, label %for.end5, label %vector.ph + + for.end5: ; preds = %vector.ph, %entry + ret void + } + + declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) + declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) + +... +--- +name: foo +alignment: 8 +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } +frameInfo: + maxAlignment: 1 + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: + - id: 0 + value: '<4 x i32> ' + alignment: 8 + isTargetSpecific: false +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.3(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $r0 + ; CHECK: tCBZ $r0, %bb.3 + ; CHECK: bb.1.vector.ph.preheader: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0 + ; CHECK: renamable $r1 = tLEApcrel %const.0, 14 /* CC::al */, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 3, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $q1 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @arr, 14 /* CC::al */, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @arr, 14 /* CC::al */, $noreg + ; CHECK: renamable $vpr = MVE_VCMPu32 killed renamable $q0, killed renamable $q1, 8, 0, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 2, 0, $noreg, undef renamable $q0 + ; CHECK: bb.2.vector.ph: + ; CHECK: successors: %bb.3(0x04000000), %bb.2(0x7c000000) + ; CHECK: liveins: $vpr, $q0, $r0, $r1 + ; CHECK: renamable $r0, $cpsr = tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr :: (store 16 into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4) + ; CHECK: tBcc %bb.2, 3 /* CC::lo */, killed $cpsr + ; CHECK: bb.3.for.end5: + ; CHECK: tBX_RET 14 /* CC::al */, $noreg + ; CHECK: bb.4 (align 8): + ; CHECK: CONSTPOOL_ENTRY 0, %const.0, 16 + bb.0.entry: + successors: %bb.3(0x30000000), %bb.1(0x50000000) + liveins: $r0 + + tCBZ $r0, %bb.3 + + bb.1.vector.ph.preheader: + successors: %bb.2(0x80000000) + liveins: $r0 + + renamable $r1 = tLEApcrel %const.0, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 3, 0, $noreg, undef renamable $q0 + renamable $q1 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + $r1 = t2MOVi16 target-flags(arm-lo16) @arr, 14 /* CC::al */, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @arr, 14 /* CC::al */, $noreg + renamable $vpr = MVE_VCMPu32 killed renamable $q0, killed renamable $q1, 8, 0, $noreg + renamable $q0 = MVE_VMOVimmi32 2, 0, $noreg, undef renamable $q0 + + bb.2.vector.ph: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + liveins: $vpr, $q0, $r0, $r1 + + renamable $r0, $cpsr = tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + MVE_VPST 8, implicit $vpr + MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr :: (store 16 into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4) + tBcc %bb.2, 3 /* CC::lo */, killed $cpsr + + bb.3.for.end5: + tBX_RET 14 /* CC::al */, $noreg + + bb.4 (align 8): + CONSTPOOL_ENTRY 0, %const.0, 16 + +... -- 2.7.4