; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
; FIXME: We don't get cases where the address was an SGPR because we
; get a copy to the address register for each one.
; CI-LABEL: simple_read2_f32:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read2_b32 v[1:2], v1 offset1:8
+; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; GFX9-LABEL: simple_read2_f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v2
-; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:8
+; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; CI-LABEL: simple_read2_f32_max_offset:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read2_b32 v[1:2], v1 offset1:255
+; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:255
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; GFX9-LABEL: simple_read2_f32_max_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v2
-; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:255
+; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:255
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; CI-LABEL: simple_read2_f32_too_far:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read_b32 v2, v1
-; CI-NEXT: ds_read_b32 v1, v1 offset:1028
+; CI-NEXT: ds_read_b32 v1, v0
+; CI-NEXT: ds_read_b32 v2, v0 offset:1028
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: v_add_f32_e32 v2, v2, v1
+; CI-NEXT: v_add_f32_e32 v2, v1, v2
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: s_endpgm
; GFX9-LABEL: simple_read2_f32_too_far:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: v_add_u32_e32 v1, lds@abs32@lo, v0
-; GFX9-NEXT: ds_read_b32 v2, v1
-; GFX9-NEXT: ds_read_b32 v1, v1 offset:1028
+; GFX9-NEXT: ds_read_b32 v1, v0
+; GFX9-NEXT: ds_read_b32 v2, v0 offset:1028
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v1, v2, v1
+; GFX9-NEXT: v_add_f32_e32 v1, v1, v2
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_endpgm
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; CI-LABEL: simple_read2_f32_x2:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v3, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read2_b32 v[1:2], v3 offset1:8
+; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8
+; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: v_add_f32_e32 v4, v1, v2
-; CI-NEXT: ds_read2_b32 v[1:2], v3 offset0:11 offset1:27
-; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_add_f32_e32 v1, v1, v2
-; CI-NEXT: v_add_f32_e32 v2, v4, v1
+; CI-NEXT: v_add_f32_e32 v2, v3, v4
+; CI-NEXT: v_add_f32_e32 v2, v1, v2
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: s_endpgm
; GFX9-LABEL: simple_read2_f32_x2:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0
-; GFX9-NEXT: v_add_u32_e32 v2, lds@abs32@lo, v4
-; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8
-; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset0:11 offset1:27
+; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset1:8
+; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; CI-LABEL: simple_read2_f32_x2_barrier:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v3, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read2_b32 v[1:2], v3 offset1:8
+; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_barrier
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
-; CI-NEXT: v_add_f32_e32 v4, v1, v2
-; CI-NEXT: ds_read2_b32 v[1:2], v3 offset0:11 offset1:27
+; CI-NEXT: v_add_f32_e32 v3, v1, v2
+; CI-NEXT: ds_read2_b32 v[1:2], v0 offset0:11 offset1:27
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_add_f32_e32 v1, v1, v2
-; CI-NEXT: v_add_f32_e32 v2, v4, v1
+; CI-NEXT: v_add_f32_e32 v2, v3, v1
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: s_endpgm
; GFX9-LABEL: simple_read2_f32_x2_barrier:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0
-; GFX9-NEXT: v_add_u32_e32 v2, lds@abs32@lo, v4
-; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8
+; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset1:8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_barrier
-; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset0:11 offset1:27
+; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; CI-LABEL: simple_read2_f32_x2_nonzero_base:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v3, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read2_b32 v[1:2], v3 offset0:2 offset1:8
+; CI-NEXT: ds_read2_b32 v[1:2], v0 offset0:2 offset1:8
+; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: v_add_f32_e32 v4, v1, v2
-; CI-NEXT: ds_read2_b32 v[1:2], v3 offset0:11 offset1:27
-; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_add_f32_e32 v1, v1, v2
-; CI-NEXT: v_add_f32_e32 v2, v4, v1
+; CI-NEXT: v_add_f32_e32 v2, v3, v4
+; CI-NEXT: v_add_f32_e32 v2, v1, v2
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:8
; CI-NEXT: s_endpgm
; GFX9-LABEL: simple_read2_f32_x2_nonzero_base:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0
-; GFX9-NEXT: v_add_u32_e32 v2, lds@abs32@lo, v4
-; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset0:2 offset1:8
-; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset0:11 offset1:27
+; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset0:2 offset1:8
+; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; CI-LABEL: read2_ptr_is_subreg_f32:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read2_b32 v[1:2], v1 offset1:8
+; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; GFX9-LABEL: read2_ptr_is_subreg_f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v2
-; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:8
+; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; CI-LABEL: simple_read2_f32_volatile_0:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read_b32 v2, v1
-; CI-NEXT: ds_read_b32 v1, v1 offset:32
+; CI-NEXT: ds_read_b32 v1, v0
+; CI-NEXT: ds_read_b32 v2, v0 offset:32
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: v_add_f32_e32 v2, v2, v1
+; CI-NEXT: v_add_f32_e32 v2, v1, v2
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: s_endpgm
; GFX9-LABEL: simple_read2_f32_volatile_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: v_add_u32_e32 v1, lds@abs32@lo, v0
-; GFX9-NEXT: ds_read_b32 v2, v1
-; GFX9-NEXT: ds_read_b32 v1, v1 offset:32
+; GFX9-NEXT: ds_read_b32 v1, v0
+; GFX9-NEXT: ds_read_b32 v2, v0 offset:32
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v1, v2, v1
+; GFX9-NEXT: v_add_f32_e32 v1, v1, v2
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_endpgm
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; CI-LABEL: simple_read2_f32_volatile_1:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v1, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read_b32 v2, v1
-; CI-NEXT: ds_read_b32 v1, v1 offset:32
+; CI-NEXT: ds_read_b32 v1, v0
+; CI-NEXT: ds_read_b32 v2, v0 offset:32
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: v_add_f32_e32 v2, v2, v1
+; CI-NEXT: v_add_f32_e32 v2, v1, v2
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: s_endpgm
; GFX9-LABEL: simple_read2_f32_volatile_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: v_add_u32_e32 v1, lds@abs32@lo, v0
-; GFX9-NEXT: ds_read_b32 v2, v1
-; GFX9-NEXT: ds_read_b32 v1, v1 offset:32
+; GFX9-NEXT: ds_read_b32 v1, v0
+; GFX9-NEXT: ds_read_b32 v2, v0 offset:32
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v1, v2, v1
+; GFX9-NEXT: v_add_f32_e32 v1, v1, v2
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_endpgm
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; CI-LABEL: simple_read2_f64:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v4, 3, v0
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds.f64@abs32@lo, v4
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read2_b64 v[0:3], v0 offset1:8
+; CI-NEXT: ds_read2_b64 v[0:3], v4 offset1:8
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; GFX9-LABEL: simple_read2_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0
-; GFX9-NEXT: v_add_u32_e32 v0, lds.f64@abs32@lo, v4
-; GFX9-NEXT: ds_read2_b64 v[0:3], v0 offset1:8
+; GFX9-NEXT: ds_read2_b64 v[0:3], v4 offset1:8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; CI-LABEL: simple_read2_f64_max_offset:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v4, 3, v0
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds.f64@abs32@lo, v4
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read2_b64 v[0:3], v0 offset1:255
+; CI-NEXT: ds_read2_b64 v[0:3], v4 offset1:255
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; GFX9-LABEL: simple_read2_f64_max_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0
-; GFX9-NEXT: v_add_u32_e32 v0, lds.f64@abs32@lo, v4
-; GFX9-NEXT: ds_read2_b64 v[0:3], v0 offset1:255
+; GFX9-NEXT: ds_read2_b64 v[0:3], v4 offset1:255
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; CI-LABEL: simple_read2_f64_too_far:
; CI: ; %bb.0:
; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
-; CI-NEXT: v_add_i32_e32 v3, vcc, lds.f64@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_read_b64 v[1:2], v3
-; CI-NEXT: ds_read_b64 v[3:4], v3 offset:2056
+; CI-NEXT: ds_read_b64 v[1:2], v0
+; CI-NEXT: ds_read_b64 v[3:4], v0 offset:2056
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, 0
; GFX9-LABEL: simple_read2_f64_too_far:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0
-; GFX9-NEXT: v_add_u32_e32 v2, lds.f64@abs32@lo, v4
-; GFX9-NEXT: ds_read_b64 v[0:1], v2
-; GFX9-NEXT: ds_read_b64 v[2:3], v2 offset:2056
+; GFX9-NEXT: ds_read_b64 v[0:1], v4
+; GFX9-NEXT: ds_read_b64 v[2:3], v4 offset:2056
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
; CI-LABEL: load_constant_adjacent_offsets:
; CI: ; %bb.0:
-; CI-NEXT: v_mov_b32_e32 v0, foo@abs32@lo
+; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
;
; GFX9-LABEL: load_constant_adjacent_offsets:
; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, foo@abs32@lo
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
; CI-LABEL: load_constant_disjoint_offsets:
; CI: ; %bb.0:
-; CI-NEXT: v_mov_b32_e32 v0, foo@abs32@lo
+; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: ds_read2_b32 v[0:1], v0 offset1:2
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
;
; GFX9-LABEL: load_constant_disjoint_offsets:
; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, foo@abs32@lo
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:2
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
; CI-LABEL: load_misaligned64_constant_offsets:
; CI: ; %bb.0:
-; CI-NEXT: v_mov_b32_e32 v2, bar@abs32@lo
+; CI-NEXT: v_mov_b32_e32 v2, 0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: ds_read2_b32 v[0:1], v2 offset1:1
; CI-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
;
; GFX9-ALIGNED-LABEL: load_misaligned64_constant_offsets:
; GFX9-ALIGNED: ; %bb.0:
-; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, bar@abs32@lo
+; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, 0
; GFX9-ALIGNED-NEXT: ds_read2_b32 v[0:1], v2 offset1:1
; GFX9-ALIGNED-NEXT: ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
;
; GFX9-UNALIGNED-LABEL: load_misaligned64_constant_offsets:
; GFX9-UNALIGNED: ; %bb.0:
-; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, bar@abs32@lo
+; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
; GFX9-UNALIGNED-NEXT: ds_read_b128 v[0:3], v0
; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0)
define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
; CI-LABEL: load_misaligned64_constant_large_offsets:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s4, bar.large@abs32@lo
-; CI-NEXT: s_add_i32 s5, s4, 0x4000
-; CI-NEXT: s_addk_i32 s4, 0x7ff8
-; CI-NEXT: v_mov_b32_e32 v0, s5
-; CI-NEXT: v_mov_b32_e32 v2, s4
+; CI-NEXT: v_mov_b32_e32 v0, 0x4000
+; CI-NEXT: v_mov_b32_e32 v2, 0x7ff8
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; CI-NEXT: ds_read2_b32 v[2:3], v2 offset1:1
;
; GFX9-LABEL: load_misaligned64_constant_large_offsets:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s2, bar.large@abs32@lo
-; GFX9-NEXT: s_add_i32 s3, s2, 0x4000
-; GFX9-NEXT: s_addk_i32 s2, 0x7ff8
-; GFX9-NEXT: v_mov_b32_e32 v0, s3
-; GFX9-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x4000
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x7ff8
; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
; GFX9-NEXT: ds_read2_b32 v[2:3], v2 offset1:1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspace(3)* %arg) {
; CI-LABEL: ds_read_call_read:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_getpc_b64 s[40:41]
+; CI-NEXT: s_mov_b32 s40, s0
+; CI-NEXT: s_load_dwordx4 s[40:43], s[40:41], 0x0
; CI-NEXT: s_load_dwordx2 s[36:37], s[0:1], 0x9
; CI-NEXT: s_load_dword s0, s[0:1], 0xb
-; CI-NEXT: s_mov_b32 s42, -1
-; CI-NEXT: s_mov_b32 s43, 0xe8f000
-; CI-NEXT: s_add_u32 s40, s40, s3
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: s_addc_u32 s41, s41, 0
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_add_u32 s40, s40, s3
+; CI-NEXT: s_addc_u32 s41, s41, 0
; CI-NEXT: v_add_i32_e32 v40, vcc, s0, v0
; CI-NEXT: s_getpc_b64 s[0:1]
; CI-NEXT: s_add_u32 s0, s0, void_func_void@gotpcrel32@lo+4
; CI-NEXT: s_addc_u32 s1, s1, void_func_void@gotpcrel32@hi+12
-; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; CI-NEXT: ds_read_b32 v41, v40
; CI-NEXT: s_mov_b64 s[0:1], s[40:41]
; CI-NEXT: s_mov_b64 s[2:3], s[42:43]
-; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_mov_b32 s39, 0xf000
; CI-NEXT: s_mov_b32 s38, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
;
; GFX9-LABEL: ds_read_call_read:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_getpc_b64 s[36:37]
+; GFX9-NEXT: s_mov_b32 s36, s0
+; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x0
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[0:1], 0x24
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, void_func_void@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, void_func_void@gotpcrel32@hi+12
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_lshl_add_u32 v40, v0, 2, s2
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-NEXT: ds_read_b32 v41, v40
; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: ds_read_b32 v0, v40 offset:4
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
@lds = addrspace(3) global [512 x float] undef, align 4
@lds.f64 = addrspace(3) global [512 x double] undef, align 8
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dword v1, v[0:1], s[0:3], 0 addr64
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b32 v0, v1, v1 offset1:8
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ds_write2_b32 v0, v1, v1 offset1:8
; GFX9-NEXT: s_endpgm
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: buffer_load_dword v1, v[0:1], s[0:3], 0 addr64 offset:4
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b32 v0, v2, v1 offset1:8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: global_load_dword v2, v0, s[0:1] offset:4
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:8
; GFX9-NEXT: s_endpgm
; CI-NEXT: s_mov_b64 s[6:7], s[2:3]
; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(1)
; CI-NEXT: ds_write_b32 v0, v2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: ds_write_b32 v0, v1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_mov_b64 s[6:7], s[2:3]
; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(1)
; CI-NEXT: ds_write_b32 v0, v2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: ds_write_b32 v0, v1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[0:3], 0 addr64
; CI-NEXT: buffer_load_dwordx2 v[1:2], v[1:2], s[0:3], 0 addr64 offset:8
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b32 v0, v3, v2 offset1:8
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 3, v0
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v3, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[2:3], v3, s[0:1] offset:8
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, lds@abs32@lo
-; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
; GFX9-NEXT: ds_write2_b32 v0, v1, v3 offset1:8
; GFX9-NEXT: s_endpgm
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx2 v[1:2], v[1:2], s[0:3], 0 addr64
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:8
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v0
-; GFX9-NEXT: v_mov_b32_e32 v3, lds@abs32@lo
-; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v3
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v1, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx4 v[1:4], v[1:2], s[0:3], 0 addr64
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b32 v0, v1, v4 offset1:8
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v0
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx4 v[1:4], v1, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, lds@abs32@lo
-; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
; GFX9-NEXT: ds_write2_b32 v0, v1, v4 offset1:8
; GFX9-NEXT: s_endpgm
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: buffer_load_dword v1, v[0:1], s[0:3], 0 addr64 offset:4
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b32 v0, v2, v1 offset1:255
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: global_load_dword v2, v0, s[0:1] offset:4
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:255
; GFX9-NEXT: s_endpgm
; CI-NEXT: s_mov_b64 s[6:7], s[2:3]
; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(1)
; CI-NEXT: ds_write_b32 v0, v2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: ds_write_b32 v0, v1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_mov_b64 s[6:7], s[2:3]
; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b32 v0, v2, v1 offset1:8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:8
; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset0:11 offset1:27
; CI-NEXT: s_mov_b64 s[6:7], s[2:3]
; CI-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
; CI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b32 v0, v2, v1 offset0:3 offset1:8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT: v_add_u32_e32 v0, lds@abs32@lo, v0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset0:3 offset1:8
; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset0:11 offset1:27
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx2 v[1:2], v[0:1], s[0:3], 0 addr64
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds.f64@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b64 v0, v[1:2], v[1:2] offset1:8
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
-; GFX9-NEXT: v_add_u32_e32 v2, lds.f64@abs32@lo, v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:8
; GFX9-NEXT: s_endpgm
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
; CI-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8
-; CI-NEXT: v_add_i32_e32 v0, vcc, lds.f64@abs32@lo, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
; GFX9-NEXT: global_load_dwordx2 v[2:3], v4, s[0:1] offset:8
-; GFX9-NEXT: v_add_u32_e32 v4, lds.f64@abs32@lo, v4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:8
; GFX9-NEXT: s_endpgm
; CI-LABEL: store_constant_adjacent_offsets:
; CI: ; %bb.0:
; CI-NEXT: s_movk_i32 s0, 0x7b
-; CI-NEXT: v_mov_b32_e32 v0, foo@abs32@lo
+; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: v_mov_b32_e32 v1, s0
; CI-NEXT: v_mov_b32_e32 v2, s0
; CI-NEXT: s_mov_b32 m0, -1
; GFX9-LABEL: store_constant_adjacent_offsets:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_movk_i32 s0, 0x7b
-; GFX9-NEXT: v_mov_b32_e32 v0, foo@abs32@lo
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_mov_b32_e32 v2, s0
; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
define amdgpu_kernel void @store_constant_disjoint_offsets() {
; CI-LABEL: store_constant_disjoint_offsets:
; CI: ; %bb.0:
-; CI-NEXT: v_mov_b32_e32 v0, foo@abs32@lo
-; CI-NEXT: v_mov_b32_e32 v1, 0x7b
+; CI-NEXT: v_mov_b32_e32 v0, 0x7b
+; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_write2_b32 v0, v1, v1 offset1:2
+; CI-NEXT: ds_write2_b32 v1, v0, v0 offset1:2
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: store_constant_disjoint_offsets:
; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v0, foo@abs32@lo
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX9-NEXT: ds_write2_b32 v0, v1, v1 offset1:2
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: ds_write2_b32 v1, v0, v0 offset1:2
; GFX9-NEXT: s_endpgm
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
define amdgpu_kernel void @store_misaligned64_constant_offsets() {
; CI-LABEL: store_misaligned64_constant_offsets:
; CI: ; %bb.0:
-; CI-NEXT: v_mov_b32_e32 v0, bar@abs32@lo
+; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: v_mov_b32_e32 v1, 0x7b
-; CI-NEXT: v_mov_b32_e32 v2, 0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
-; CI-NEXT: ds_write2_b32 v0, v1, v2 offset0:2 offset1:3
+; CI-NEXT: ds_write2_b32 v0, v1, v0 offset1:1
+; CI-NEXT: ds_write2_b32 v0, v1, v0 offset0:2 offset1:3
; CI-NEXT: s_endpgm
;
; GFX9-ALIGNED-LABEL: store_misaligned64_constant_offsets:
; GFX9-ALIGNED: ; %bb.0:
-; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, bar@abs32@lo
+; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, 0
; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-ALIGNED-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
-; GFX9-ALIGNED-NEXT: ds_write2_b32 v0, v1, v2 offset0:2 offset1:3
+; GFX9-ALIGNED-NEXT: ds_write2_b32 v0, v1, v0 offset1:1
+; GFX9-ALIGNED-NEXT: ds_write2_b32 v0, v1, v0 offset0:2 offset1:3
; GFX9-ALIGNED-NEXT: s_endpgm
;
; GFX9-UNALIGNED-LABEL: store_misaligned64_constant_offsets:
; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v1, 0
; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, v0
; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v4, bar@abs32@lo
-; GFX9-UNALIGNED-NEXT: ds_write_b128 v4, v[0:3]
+; GFX9-UNALIGNED-NEXT: ds_write_b128 v1, v[0:3]
; GFX9-UNALIGNED-NEXT: s_endpgm
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
; CI-LABEL: store_misaligned64_constant_large_offsets:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s0, bar.large@abs32@lo
-; CI-NEXT: s_add_i32 s1, s0, 0x4000
-; CI-NEXT: v_mov_b32_e32 v0, s1
+; CI-NEXT: v_mov_b32_e32 v0, 0x4000
; CI-NEXT: v_mov_b32_e32 v1, 0x7b
; CI-NEXT: v_mov_b32_e32 v2, 0
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: s_addk_i32 s0, 0x7ff8
; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
-; CI-NEXT: v_mov_b32_e32 v0, s0
+; CI-NEXT: v_mov_b32_e32 v0, 0x7ff8
; CI-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: store_misaligned64_constant_large_offsets:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s0, bar.large@abs32@lo
-; GFX9-NEXT: s_add_i32 s1, s0, 0x4000
-; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x4000
; GFX9-NEXT: v_mov_b32_e32 v1, 0x7b
; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: s_addk_i32 s0, 0x7ff8
; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
-; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0x7ff8
; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1
; GFX9-NEXT: s_endpgm
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4