[GlobalISel] Precommit a test for D140965
authorDiana Picus <Diana-Magda.Picus@amd.com>
Wed, 4 Jan 2023 13:15:31 +0000 (14:15 +0100)
committerDiana Picus <Diana-Magda.Picus@amd.com>
Thu, 5 Jan 2023 08:59:27 +0000 (09:59 +0100)
Add a test for CSE-ing G_BUILD_VECTOR. This will be enabled in D140965.

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-splat.mir [new file with mode: 0644]

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-splat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-splat.mir
new file mode 100644 (file)
index 0000000..4f96b06
--- /dev/null
@@ -0,0 +1,27 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck %s
+
+# Make sure we CSE when building a vector splat.
+# See https://reviews.llvm.org/D140965
+---
+name: build_vector_v8s16_splat
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: build_vector_v8s16_splat
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16256
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
+    ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>)
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $vgpr1
+    %0:_(p1) = G_MERGE_VALUES %1(s32), %2(s32)
+    %4:_(s16) = G_CONSTANT i16 16256
+    %3:_(<8 x s16>) = G_BUILD_VECTOR %4(s16), %4(s16), %4(s16), %4(s16), %4(s16), %4(s16), %4(s16), %4(s16)
+    S_NOP 0, implicit %3
+...