From: Matt Arsenault Date: Wed, 1 Apr 2020 20:46:10 +0000 (-0400) Subject: AMDGPU: Use 128-bit DS operations by default X-Git-Tag: llvmorg-12-init~10266 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f68cc2a7ed766965028b8b0f0d9300a0460c3cf1;p=platform%2Fupstream%2Fllvm.git AMDGPU: Use 128-bit DS operations by default --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 5d58b82..df4c630 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -78,7 +78,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, // unset everything else if it is disabled // Assuming ECC is enabled is the conservative default. - SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,+xnack,"); + SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 7e0607d..2fb1223 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -mattr=-enable-ds128 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -mattr=+enable-ds128 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI-DS128 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s @@ -1759,24 +1759,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; VI-LABEL: name: test_load_local_s96_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, align 8, addrspace 3) - ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 8, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; GFX9-LABEL: name: test_load_local_s96_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, align 8, addrspace 3) - ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + ; GFX9: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1814,24 +1802,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; VI-LABEL: name: test_load_local_s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3) - ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; GFX9-LABEL: name: test_load_local_s96_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3) - ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + ; GFX9: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -2969,94 +2945,99 @@ body: | ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3) - ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) + ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) + ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) + ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) + ; VI: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C15]] ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C15]] + ; VI: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C16]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] + ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C15]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) + ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C15]] + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C16]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] + ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C15]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) + ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C15]] + ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C16]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] + ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C15]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) + ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C15]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C16]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] + ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C15]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] + ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C15]] + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C16]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] + ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C15]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C15]] + ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C16]](s16) + ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] + ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C15]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] + ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C15]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C16]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] + ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C15]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-LABEL: name: test_load_local_s128_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -3081,94 +3062,99 @@ body: | ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3) - ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C15]] ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C15]] + ; GFX9: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C16]](s16) ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] + ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C15]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) + ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C15]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C16]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] + ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C15]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) + ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C15]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C16]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] + ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C15]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) + ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C15]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C16]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] + ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C15]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] + ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C15]] + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C16]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] + ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C15]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C15]] + ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C16]](s16) + ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] + ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C15]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] + ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C15]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C16]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] + ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C15]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) + ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3202,20 +3188,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; VI-LABEL: name: test_load_local_s128_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX9-LABEL: name: test_load_local_s128_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 16, align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3249,20 +3227,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; VI-LABEL: name: test_load_local_s128_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX9-LABEL: name: test_load_local_s128_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3440,45 +3410,46 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2, addrspace 3) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2, addrspace 3) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2, addrspace 3) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2, addrspace 3) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2, addrspace 3) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2, addrspace 3) - ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2, addrspace 3) - ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2, addrspace 3) - ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-LABEL: name: test_load_local_s128_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) @@ -3491,45 +3462,46 @@ body: | ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2, addrspace 3) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2, addrspace 3) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2, addrspace 3) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2, addrspace 3) + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2, addrspace 3) + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2, addrspace 3) - ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2, addrspace 3) - ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2, addrspace 3) - ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) - ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) + ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 16, align 2, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3954,94 +3926,99 @@ body: | ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3) - ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) + ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) + ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) + ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) + ; VI: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C15]] ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C15]] + ; VI: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C16]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] + ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C15]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) + ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C15]] + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C16]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] + ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C15]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) + ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C15]] + ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C16]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] + ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C15]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) + ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C15]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C16]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] + ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C15]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] + ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C15]] + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C16]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] + ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C15]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C15]] + ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C16]](s16) + ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] + ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C15]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] + ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C15]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C16]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] + ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C15]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-LABEL: name: test_load_local_s128_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -4066,94 +4043,99 @@ body: | ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3) - ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C15]] ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] - ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C15]] + ; GFX9: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C16]](s16) ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] + ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C15]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16) + ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C15]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C16]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] + ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C15]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16) + ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C15]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C16]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) - ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] + ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C15]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) + ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C15]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C16]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) - ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) - ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) - ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] + ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C15]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] + ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C15]] + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C16]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) - ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] + ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C15]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C15]] + ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C16]](s16) + ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) - ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] + ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C15]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) - ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] + ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C15]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C16]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) - ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] + ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C15]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) - ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) + ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -5846,33 +5828,38 @@ body: | ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) + ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) - ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) + ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) @@ -5881,10 +5868,9 @@ body: | ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<8 x s32>) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>) ; GFX9-LABEL: name: test_load_local_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -5909,6 +5895,30 @@ body: | ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -5921,25 +5931,6 @@ body: | ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) - ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3) - ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) @@ -5952,10 +5943,9 @@ body: | ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<8 x s16>) - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS2]](<16 x s8>) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<16 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8187,24 +8177,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_load_local_v3s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3) - ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-LABEL: name: test_load_local_v3s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3) - ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -8238,20 +8216,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8285,20 +8255,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8332,20 +8294,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -9048,7 +9002,6 @@ body: | ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) @@ -9074,7 +9027,8 @@ body: | ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] - ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) @@ -9098,9 +9052,8 @@ body: | ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -9157,7 +9110,6 @@ body: | ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) @@ -9183,7 +9135,8 @@ body: | ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] - ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3) @@ -9207,9 +9160,8 @@ body: | ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -9259,31 +9211,19 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; VI-LABEL: name: test_load_local_v8s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load 8, addrspace 3) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16, addrspace 3) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; GFX9-LABEL: name: test_load_local_v8s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load 8, addrspace 3) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16, addrspace 3) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, align 32, addrspace 3) @@ -9349,20 +9289,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_local_v2s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-LABEL: name: test_load_local_v2s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -10033,30 +9965,28 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) ; VI-LABEL: name: test_load_local_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 16, addrspace 3) + ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0 + ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) ; GFX9-LABEL: name: test_load_local_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 16, addrspace 3) + ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0 + ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0 + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 3) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -10108,32 +10038,20 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; VI-LABEL: name: test_load_local_v4s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load 8, addrspace 3) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16, addrspace 3) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; GFX9-LABEL: name: test_load_local_v4s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load 8, addrspace 3) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16, addrspace 3) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -10167,20 +10085,12 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; VI-LABEL: name: test_load_local_v2p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[LOAD]](p1), [[LOAD1]](p1) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: test_load_local_v2p1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[LOAD]](p1), [[LOAD1]](p1) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -12026,46 +11936,24 @@ body: | ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; VI-LABEL: name: test_extload_local_v2s96_from_24_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3) - ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF]](s96) - ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[LOAD]](s64), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4, addrspace 3) - ; VI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD2]](s64), 0 - ; VI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64 - ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; VI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96) - ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96) + ; VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p3) :: (load 12, align 4, addrspace 3) + ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[LOAD]](s96) + ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[LOAD1]](s96) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3) - ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF]](s96) - ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[LOAD]](s64), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4, addrspace 3) - ; GFX9: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD2]](s64), 0 - ; GFX9: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64 - ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; GFX9: [[COPY3:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96) - ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[LOAD]](s96) + ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[LOAD1]](s96) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 4, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -12134,46 +12022,24 @@ body: | ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; VI-LABEL: name: test_extload_local_v2s96_from_24_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, align 8, addrspace 3) - ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF]](s96) - ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[LOAD]](s64), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 4, addrspace 3) - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4, addrspace 3) - ; VI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD2]](s64), 0 - ; VI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64 - ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; VI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96) - ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96) + ; VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p3) :: (load 12, align 4, addrspace 3) + ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[LOAD]](s96) + ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[LOAD1]](s96) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, align 8, addrspace 3) - ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF]](s96) - ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[LOAD]](s64), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 4, addrspace 3) - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4, addrspace 3) - ; GFX9: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD2]](s64), 0 - ; GFX9: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64 - ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; GFX9: [[COPY3:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96) - ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[LOAD]](s96) + ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[LOAD1]](s96) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 16, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/concat_vectors.ll b/llvm/test/CodeGen/AMDGPU/concat_vectors.ll index e50173b..4fe3008 100644 --- a/llvm/test/CodeGen/AMDGPU/concat_vectors.ll +++ b/llvm/test/CodeGen/AMDGPU/concat_vectors.ll @@ -309,8 +309,8 @@ define amdgpu_kernel void @concat_vector_crash2(<8 x i8> addrspace(1)* %out, i32 ; GCN-LABEL: {{^}}build_vector_splat_concat_v8i16: ; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; VI: ds_write_b64 -; VI: ds_write2_b64 +; VI: ds_write_b128 +; VI: ds_write_b128 define amdgpu_kernel void @build_vector_splat_concat_v8i16() { entry: store <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* undef, align 16 diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll b/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll index ef4efc6..5e2fc93 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt,-enable-ds128 < %s | FileCheck -check-prefixes=CI,NODS128 %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.v2 = addrspace(3) global [512 x <2 x float>] undef, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll index 699f206..39afd17 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll @@ -49,8 +49,8 @@ define amdgpu_kernel void @private_access_f64_alloca(double addrspace(1)* noalia ; SI-PROMOTE: ds_write_b64 ; SI-PROMOTE: ds_read_b64 ; SI-PROMOTE: ds_read_b64 -; CI-PROMOTE: ds_write2_b64 -; CI-PROMOTE: ds_read2_b64 +; CI-PROMOTE: ds_write_b128 +; CI-PROMOTE: ds_read_b128 define amdgpu_kernel void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 { %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16 %array = alloca [4 x <2 x double>], align 16, addrspace(5) @@ -107,8 +107,8 @@ define amdgpu_kernel void @private_access_i64_alloca(i64 addrspace(1)* noalias % ; SI-PROMOTE: ds_write_b64 ; SI-PROMOTE: ds_read_b64 ; SI-PROMOTE: ds_read_b64 -; CI-PROMOTE: ds_write2_b64 -; CI-PROMOTE: ds_read2_b64 +; CI-PROMOTE: ds_write_b128 +; CI-PROMOTE: ds_read_b128 define amdgpu_kernel void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 { %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16 %array = alloca [4 x <2 x i64>], align 16, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll b/llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll index 9a23df9..c4599ee 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll @@ -18,9 +18,9 @@ define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> % ; GCN-LABEL: store_v5i32: ; GCN: ds_read_b32 -; GCN: ds_read2_b64 +; GCN: ds_read_b128 ; GCN: ds_write_b32 -; GCN: ds_write2_b64 +; GCN: ds_write_b128 ; GCN: ScratchSize: 0 define amdgpu_kernel void @store_v5i32(<5 x i32> addrspace(3)* %out, <5 x i32> %a) nounwind { %val = load <5 x i32>, <5 x i32> addrspace(3)* %out @@ -28,5 +28,3 @@ define amdgpu_kernel void @store_v5i32(<5 x i32> addrspace(3)* %out, <5 x i32> % store <5 x i32> %val.1, <5 x i32> addrspace(3)* %out, align 16 ret void } - - diff --git a/llvm/test/CodeGen/AMDGPU/load-local-f32.ll b/llvm/test/CodeGen/AMDGPU/load-local-f32.ll index c33c000..a0559c1 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-f32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-f32.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s ; Testing for ds_read/write_128 diff --git a/llvm/test/CodeGen/AMDGPU/load-local-f64.ll b/llvm/test/CodeGen/AMDGPU/load-local-f64.ll index e313b38..7495860 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-f64.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-f64.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s ; Testing for ds_read_b128 diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll index d8d7d98..357141b 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s ; Testing for ds_read/write_b128 diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll index 1088788..0063bb0 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; Testing for ds_read/write_128 diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i64.ll b/llvm/test/CodeGen/AMDGPU/load-local-i64.ll index d91c3b5..8a07640 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i64.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i64.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s ; Testing for ds_read/write_b128 diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i8.ll b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll index 02477ef..8137ded 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i8.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s ; Testing for ds_read/write_b128 diff --git a/llvm/test/CodeGen/AMDGPU/local-64.ll b/llvm/test/CodeGen/AMDGPU/local-64.ll index f0dca07..3e85dd5 100644 --- a/llvm/test/CodeGen/AMDGPU/local-64.ll +++ b/llvm/test/CodeGen/AMDGPU/local-64.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI,CIPLUS %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI,CIPLUS %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,CIPLUS %s ; GCN-LABEL: {{^}}local_i32_load ; SICIVI: s_mov_b32 m0 @@ -165,7 +165,8 @@ define amdgpu_kernel void @local_f64_store_0_offset(double addrspace(3)* %out) n ; GFX9-NOT: m0 ; GCN-NOT: add -; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 +; SI: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 +; CIPLUS: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 ; GCN: s_endpgm define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7 @@ -178,7 +179,10 @@ define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounw ; GFX9-NOT: m0 ; GCN-NOT: add -; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1 + +; SI: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1{{$}} +; CIPLUS: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}} + ; GCN: s_endpgm define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind { store <2 x i64> , <2 x i64> addrspace(3)* %out, align 16 @@ -190,8 +194,12 @@ define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %o ; GFX9-NOT: m0 ; GCN-NOT: add -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29 +; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31 +; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29 + +; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224{{$}} +; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240{{$}} + ; GCN: s_endpgm define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7 @@ -204,8 +212,12 @@ define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounw ; GFX9-NOT: m0 ; GCN-NOT: add -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1 +; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 +; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1 + +; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}} +; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16{{$}} + ; GCN: s_endpgm define amdgpu_kernel void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind { store <4 x i64> , <4 x i64> addrspace(3)* %out, align 16 diff --git a/llvm/test/CodeGen/AMDGPU/reorder-stores.ll b/llvm/test/CodeGen/AMDGPU/reorder-stores.ll index 260b32e..a379a64 100644 --- a/llvm/test/CodeGen/AMDGPU/reorder-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/reorder-stores.ll @@ -1,12 +1,12 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,VI %s -; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store: -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: s_endpgm +; GCN-LABEL: {{^}}no_reorder_v2f64_global_load_store: +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: s_endpgm define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind { %tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16 %tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16 @@ -15,10 +15,14 @@ define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrs ret void } -; SI-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store: +; GCN-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store: ; SI: ds_read2_b64 ; SI: ds_write2_b64 -; SI: s_endpgm + +; VI: ds_read_b128 +; VI: ds_write_b128 + +; GCN: s_endpgm define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind { %tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16 %tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16 @@ -27,18 +31,18 @@ define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x dou ret void } -; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store: -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 +; GCN-LABEL: {{^}}no_reorder_split_v8i32_global_load_store: +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: s_endpgm +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: s_endpgm define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind { %tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32 %tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32 @@ -47,13 +51,13 @@ define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> ad ret void } -; SI-LABEL: {{^}}no_reorder_extload_64: -; SI: ds_read_b64 -; SI: ds_read_b64 -; SI: ds_write_b64 -; SI-NOT: ds_read -; SI: ds_write_b64 -; SI: s_endpgm +; GCN-LABEL: {{^}}no_reorder_extload_64: +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_write_b64 +; GCN-NOT: ds_read +; GCN: ds_write_b64 +; GCN: s_endpgm define amdgpu_kernel void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind { %tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8 %tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll b/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll index c4db4c5..6cecc23 100644 --- a/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt,-enable-ds128 < %s | FileCheck -check-prefix=GCN %s @sPrivateStorage = internal addrspace(3) global [256 x [8 x <4 x i64>]] undef diff --git a/llvm/test/CodeGen/AMDGPU/store-local.ll b/llvm/test/CodeGen/AMDGPU/store-local.ll index f0e76ac..f302ea0 100644 --- a/llvm/test/CodeGen/AMDGPU/store-local.ll +++ b/llvm/test/CodeGen/AMDGPU/store-local.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,VI,FUNC %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s @@ -156,7 +156,9 @@ entry: ; CM: LDS_WRITE ; CM: LDS_WRITE -; GCN: ds_write2_b64 +; SI: ds_write2_b32 +; VI: ds_write_b128 +; GFX9: ds_write_b128 define amdgpu_kernel void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) { entry: store <4 x i32> %in, <4 x i32> addrspace(3)* %out diff --git a/llvm/test/CodeGen/AMDGPU/store-v3i64.ll b/llvm/test/CodeGen/AMDGPU/store-v3i64.ll index 7af1736..b2e69de 100644 --- a/llvm/test/CodeGen/AMDGPU/store-v3i64.ll +++ b/llvm/test/CodeGen/AMDGPU/store-v3i64.ll @@ -46,8 +46,14 @@ define amdgpu_kernel void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* } ; GCN-LABEL: {{^}}local_store_v3i64: -; GCN: ds_write2_b64 -; GCN: ds_write_b64 +; SI: ds_write2_b64 +; SI: ds_write_b64 + +; CI: ds_write_b64 +; CI: ds_write_b128 + +; VI: ds_write_b64 +; VI: ds_write_b128 define amdgpu_kernel void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32 ret void