From 3c1273d7378e1d51abf9b43c764ccb9828e4a26a Mon Sep 17 00:00:00 2001 From: Tim Corringham Date: Tue, 28 Jul 2020 19:01:03 +0100 Subject: [PATCH] [AMDGPU] Add amdgpu specific loop threshold metadata Add new loop metadata amdgpu.loop.unroll.threshold to allow the initial AMDGPU specific unroll threshold value to be specified on a loop by loop basis. The intention is to be able to to allow more nuanced hints, e.g. specifying a low threshold value to indicate that a loop may be unrolled if cheap enough rather than using the all or nothing llvm.loop.unroll.disable metadata. Differential Revision: https://reviews.llvm.org/D84779 --- .../Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 20 ++++ .../LoopUnroll/AMDGPU/unroll-threshold.ll | 113 +++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-threshold.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 78dc20b..31585ed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -117,6 +117,26 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, const unsigned MaxAlloca = (256 - 16) * 4; unsigned ThresholdPrivate = UnrollThresholdPrivate; unsigned ThresholdLocal = UnrollThresholdLocal; + + // If this loop has the amdgpu.loop.unroll.threshold metadata we will use the + // provided threshold value as the default for Threshold + if (MDNode *LoopUnrollThreshold = + findOptionMDForLoop(L, "amdgpu.loop.unroll.threshold")) { + if (LoopUnrollThreshold->getNumOperands() == 2) { + ConstantInt *MetaThresholdValue = mdconst::extract_or_null( + LoopUnrollThreshold->getOperand(1)); + if (MetaThresholdValue) { + // We will also use the supplied value for PartialThreshold for now. + // We may introduce additional metadata if it becomes necessary in the + // future. + UP.Threshold = MetaThresholdValue->getSExtValue(); + UP.PartialThreshold = UP.Threshold; + ThresholdPrivate = std::min(ThresholdPrivate, UP.Threshold); + ThresholdLocal = std::min(ThresholdLocal, UP.Threshold); + } + } + } + unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal); for (const BasicBlock *BB : L->getBlocks()) { const DataLayout &DL = BB->getModule()->getDataLayout(); diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-threshold.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-threshold.ll new file mode 100644 index 0000000..5c551af --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-threshold.ll @@ -0,0 +1,113 @@ +; RUN: opt < %s -S -mtriple=amdgcn-- -loop-unroll | FileCheck %s + +; Check the handling of amdgpu.loop.unroll.threshold metadata which can be used to +; set the default threshold for a loop. This metadata overrides both the AMDGPU +; default, and any value specified by the amdgpu-unroll-threshold function attribute +; (which sets a threshold for all loops in the function). + +; Check that the loop in unroll_default is not fully unrolled using the default +; unroll threshold +; CHECK-LABEL: @unroll_default +; CHECK: entry: +; CHECK: br i1 %cmp +; CHECK: ret void + +@in = internal unnamed_addr global i32* null, align 8 +@out = internal unnamed_addr global i32* null, align 8 + +define void @unroll_default() { +entry: + br label %do.body + +do.body: ; preds = %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 + store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret void +} + +; Check that the same loop in unroll_full is fully unrolled when the default +; unroll threshold is increased by use of the amdgpu.loop.unroll.threshold metadata +; CHECK-LABEL: @unroll_full +; CHECK: entry: +; CHECK-NOT: br i1 %cmp +; CHECK: ret void + +define void @unroll_full() { +entry: + br label %do.body + +do.body: ; preds = %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 + store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %do.body, label %do.end, !llvm.loop !1 + +do.end: ; preds = %do.body + ret void +} + +; Check that the same loop in override_no_unroll is not unrolled when a high default +; unroll threshold specified using the amdgpu-unroll-threshold function attribute +; is overridden by a low threshold using the amdgpu.loop.unroll.threshold metadata + +; CHECK-LABEL: @override_no_unroll +; CHECK: entry: +; CHECK: br i1 %cmp +; CHECK: ret void + +define void @override_no_unroll() #0 { +entry: + br label %do.body + +do.body: ; preds = %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 + store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %do.body, label %do.end, !llvm.loop !3 + +do.end: ; preds = %do.body + ret void +} + +; Check that the same loop in override_unroll is fully unrolled when a low default +; unroll threshold specified using the amdgpu-unroll-threshold function attribute +; is overridden by a high threshold using the amdgpu.loop.unroll.threshold metadata + +; CHECK-LABEL: @override_unroll +; CHECK: entry: +; CHECK-NOT: br i1 %cmp +; CHECK: ret void + +define void @override_unroll() #1 { +entry: + br label %do.body + +do.body: ; preds = %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 + store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %do.body, label %do.end, !llvm.loop !1 + +do.end: ; preds = %do.body + ret void +} + +attributes #0 = { "amdgpu-unroll-threshold"="1000" } +attributes #1 = { "amdgpu-unroll-threshold"="100" } + +!1 = !{!1, !2} +!2 = !{!"amdgpu.loop.unroll.threshold", i32 1000} +!3 = !{!3, !4} +!4 = !{!"amdgpu.loop.unroll.threshold", i32 100} -- 2.7.4