From 1bb293f6582b76adc54aa54ae442d4287ef778a0 Mon Sep 17 00:00:00 2001 From: jeff Date: Mon, 19 Sep 2022 18:08:55 +0000 Subject: [PATCH] [AMDGPU] [DAGCombiner] Precommit test for D133584 Change-Id: I488ac9b23718f8d0b28db034c4cc455ae736e785 --- llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll | 81 +++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll diff --git a/llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll b/llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll new file mode 100644 index 0000000..e977a71 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @vectorLoadCombine(<4 x i8>* %in, i32* %out) { +; GCN-LABEL: vectorLoadCombine: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_mov_b32 s0, 0x6050400 +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v3, v2, 8, 8 +; GCN-NEXT: v_and_b32_e32 v4, 0xff0000, v2 +; GCN-NEXT: v_perm_b32 v3, v3, v2, s0 +; GCN-NEXT: v_and_b32_e32 v2, 0xff000000, v2 +; GCN-NEXT: v_or3_b32 v2, v3, v4, v2 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm +entry: + %0 = load <4 x i8>, <4 x i8>* %in, align 4 + %1 = extractelement <4 x i8> %0, i32 0 + %2 = extractelement <4 x i8> %0, i32 1 + %3 = extractelement <4 x i8> %0, i32 2 + %4 = extractelement <4 x i8> %0, i32 3 + %zext0 = zext i8 %1 to i32 + %zext1 = zext i8 %2 to i32 + %shift1 = shl nuw nsw i32 %zext1, 8 + %insert1 = or i32 %shift1, %zext0 + %zext2 = zext i8 %3 to i32 + %shift2 = shl nuw nsw i32 %zext2, 16 + %insert2 = or i32 %insert1, %shift2 + %zext3 = zext i8 %4 to i32 + %shift3 = shl nuw i32 %zext3, 24 + %insert3 = or i32 %insert2, %shift3 + store i32 %insert3, i32* %out + ret void +} + +define amdgpu_kernel void @vectorLoadShuffle(<4 x i8>* %in, i32* %out) { +; GCN-LABEL: vectorLoadShuffle: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: flat_load_dword v2, v[0:1] +; GCN-NEXT: s_mov_b32 s0, 0x6050400 +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v3, v2, 16, 8 +; GCN-NEXT: v_lshlrev_b32_e32 v4, 8, v2 +; GCN-NEXT: v_perm_b32 v3, v3, v2, s0 +; GCN-NEXT: v_and_b32_e32 v4, 0xff0000, v4 +; GCN-NEXT: v_and_b32_e32 v2, 0xff000000, v2 +; GCN-NEXT: v_or3_b32 v2, v3, v4, v2 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm +entry: + %0 = load <4 x i8>, <4 x i8>* %in, align 4 + %1 = extractelement <4 x i8> %0, i32 0 + %2 = extractelement <4 x i8> %0, i32 1 + %3 = extractelement <4 x i8> %0, i32 2 + %4 = extractelement <4 x i8> %0, i32 3 + %zext0 = zext i8 %1 to i32 + %zext1 = zext i8 %3 to i32 + %shift1 = shl nuw nsw i32 %zext1, 8 + %insert1 = or i32 %shift1, %zext0 + %zext2 = zext i8 %2 to i32 + %shift2 = shl nuw nsw i32 %zext2, 16 + %insert2 = or i32 %insert1, %shift2 + %zext3 = zext i8 %4 to i32 + %shift3 = shl nuw i32 %zext3, 24 + %insert3 = or i32 %insert2, %shift3 + store i32 %insert3, i32* %out + ret void +} -- 2.7.4