From 53afdb712d3bee4030264f6168a369061de52a86 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 22 May 2023 09:50:23 +0100 Subject: [PATCH] [SLP][RISCV] Add test for folding offsets in GEP pointer chains --- .../SLPVectorizer/RISCV/getpointerschaincost.ll | 101 +++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll new file mode 100644 index 0000000..6a21760 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=riscv64 -mattr=+v -riscv-v-slp-max-vf=0 -passes=slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s +; RUN: FileCheck --input-file=%t --check-prefix=YAML %s + +; Because all of these addresses are foldable, the scalar cost should be 0 when +; computing the pointers chain cost. +; +; TODO: These are currently costed as free the indices are all constants, but we +; should check if the constants are actually foldable +define void @f(ptr %dest, i64 %i) { +; CHECK-LABEL: define void @f +; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 0 +; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: ret void +; +entry: +; YAML: Pass: slp-vectorizer +; YAML-NEXT: Name: StoresVectorized +; YAML-NEXT: Function: f +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' +; YAML-NEXT: - Cost: '-2' +; YAML-NEXT: - String: ' and with tree size ' +; YAML-NEXT: - TreeSize: '2' + %p1 = getelementptr i32, ptr %dest, i32 0 + store i32 1, ptr %p1 + %p2 = getelementptr i32, ptr %dest, i32 1 + store i32 1, ptr %p2 + %p3 = getelementptr i32, ptr %dest, i32 2 + store i32 1, ptr %p3 + %p4 = getelementptr i32, ptr %dest, i32 3 + store i32 1, ptr %p4 + ret void +} + +; When computing the scalar pointers chain cost here, there is a cost of 1 for +; the base pointer, and the rest can be folded in, so the scalar cost should be +; 1. +; +; TODO: These are currently costed as free the indices are all constants, but we +; should check if the constants are actually foldable +define void @g(ptr %dest, i64 %i) { +; CHECK-LABEL: define void @g +; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 2048 +; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: ret void +; +entry: +; YAML: Pass: slp-vectorizer +; YAML-NEXT: Name: StoresVectorized +; YAML-NEXT: Function: g +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' +; YAML-NEXT: - Cost: '-2' +; YAML-NEXT: - String: ' and with tree size ' +; YAML-NEXT: - TreeSize: '2' + %p1 = getelementptr i32, ptr %dest, i32 2048 + store i32 1, ptr %p1 + %p2 = getelementptr i32, ptr %dest, i32 2049 + store i32 1, ptr %p2 + %p3 = getelementptr i32, ptr %dest, i32 2050 + store i32 1, ptr %p3 + %p4 = getelementptr i32, ptr %dest, i32 2051 + store i32 1, ptr %p4 + ret void +} + +; FIXME: When computing the scalar pointers chain cost here, there is a cost of +; 1 for the base pointer, and the rest can be folded in, so the scalar cost +; should be 1. +define void @h(ptr %dest, i32 %i) { +; CHECK-LABEL: define void @h +; CHECK-SAME: (ptr [[DEST:%.*]], i32 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr [4 x i32], ptr [[DEST]], i32 [[I]], i32 0 +; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: ret void +; +entry: +; YAML: Pass: slp-vectorizer +; YAML-NEXT: Name: StoresVectorized +; YAML-NEXT: Function: h +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' +; YAML-NEXT: - Cost: '-5' +; YAML-NEXT: - String: ' and with tree size ' +; YAML-NEXT: - TreeSize: '2' + %p1 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 0 + store i32 1, ptr %p1 + %p2 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 1 + store i32 1, ptr %p2 + %p3 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 2 + store i32 1, ptr %p3 + %p4 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 3 + store i32 1, ptr %p4 + ret void +} -- 2.7.4