define amdgpu_kernel void @add_i3(i3 %a, i3 %b) {
; SI-LABEL: @add_i3(
; SI-NEXT: [[R:%.*]] = add i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @add_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = add i3 %a, %b
define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) {
; SI-LABEL: @add_nsw_i3(
; SI-NEXT: [[R:%.*]] = add nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @add_nsw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = add nsw i3 %a, %b
define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) {
; SI-LABEL: @add_nuw_i3(
; SI-NEXT: [[R:%.*]] = add nuw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @add_nuw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = add nuw i3 %a, %b
define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) {
; SI-LABEL: @add_nuw_nsw_i3(
; SI-NEXT: [[R:%.*]] = add nuw nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @add_nuw_nsw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = add nuw nsw i3 %a, %b
define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) {
; SI-LABEL: @sub_i3(
; SI-NEXT: [[R:%.*]] = sub i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @sub_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = sub i3 %a, %b
define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) {
; SI-LABEL: @sub_nsw_i3(
; SI-NEXT: [[R:%.*]] = sub nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nsw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = sub nsw i3 %a, %b
define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) {
; SI-LABEL: @sub_nuw_i3(
; SI-NEXT: [[R:%.*]] = sub nuw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nuw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = sub nuw i3 %a, %b
define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) {
; SI-LABEL: @sub_nuw_nsw_i3(
; SI-NEXT: [[R:%.*]] = sub nuw nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nuw_nsw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = sub nuw nsw i3 %a, %b
define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) {
; SI-LABEL: @mul_i3(
; SI-NEXT: [[R:%.*]] = mul i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @mul_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = mul i3 %a, %b
define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) {
; SI-LABEL: @mul_nsw_i3(
; SI-NEXT: [[R:%.*]] = mul nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nsw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = mul nsw i3 %a, %b
define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) {
; SI-LABEL: @mul_nuw_i3(
; SI-NEXT: [[R:%.*]] = mul nuw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nuw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = mul nuw i3 %a, %b
define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) {
; SI-LABEL: @mul_nuw_nsw_i3(
; SI-NEXT: [[R:%.*]] = mul nuw nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nuw_nsw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = mul nuw nsw i3 %a, %b
define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) {
; SI-LABEL: @shl_i3(
; SI-NEXT: [[R:%.*]] = shl i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @shl_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = shl i3 %a, %b
define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) {
; SI-LABEL: @shl_nsw_i3(
; SI-NEXT: [[R:%.*]] = shl nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nsw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = shl nsw i3 %a, %b
define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) {
; SI-LABEL: @shl_nuw_i3(
; SI-NEXT: [[R:%.*]] = shl nuw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nuw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = shl nuw i3 %a, %b
define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) {
; SI-LABEL: @shl_nuw_nsw_i3(
; SI-NEXT: [[R:%.*]] = shl nuw nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nuw_nsw_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = shl nuw nsw i3 %a, %b
define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) {
; SI-LABEL: @lshr_i3(
; SI-NEXT: [[R:%.*]] = lshr i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @lshr_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = lshr i3 %a, %b
define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) {
; SI-LABEL: @lshr_exact_i3(
; SI-NEXT: [[R:%.*]] = lshr exact i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @lshr_exact_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = lshr exact i3 %a, %b
define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) {
; SI-LABEL: @ashr_i3(
; SI-NEXT: [[R:%.*]] = ashr i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @ashr_i3(
; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = ashr i3 %a, %b
define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) {
; SI-LABEL: @ashr_exact_i3(
; SI-NEXT: [[R:%.*]] = ashr exact i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @ashr_exact_i3(
; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = ashr exact i3 %a, %b
define amdgpu_kernel void @and_i3(i3 %a, i3 %b) {
; SI-LABEL: @and_i3(
; SI-NEXT: [[R:%.*]] = and i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @and_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = and i3 %a, %b
define amdgpu_kernel void @or_i3(i3 %a, i3 %b) {
; SI-LABEL: @or_i3(
; SI-NEXT: [[R:%.*]] = or i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @or_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = or i3 %a, %b
define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) {
; SI-LABEL: @xor_i3(
; SI-NEXT: [[R:%.*]] = xor i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @xor_i3(
; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%r = xor i3 %a, %b
; SI-LABEL: @select_eq_i3(
; SI-NEXT: [[CMP:%.*]] = icmp eq i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_eq_i3(
; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp eq i3 %a, %b
; SI-LABEL: @select_ne_i3(
; SI-NEXT: [[CMP:%.*]] = icmp ne i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_ne_i3(
; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp ne i3 %a, %b
; SI-LABEL: @select_ugt_i3(
; SI-NEXT: [[CMP:%.*]] = icmp ugt i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_ugt_i3(
; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp ugt i3 %a, %b
; SI-LABEL: @select_uge_i3(
; SI-NEXT: [[CMP:%.*]] = icmp uge i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_uge_i3(
; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp uge i3 %a, %b
; SI-LABEL: @select_ult_i3(
; SI-NEXT: [[CMP:%.*]] = icmp ult i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_ult_i3(
; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp ult i3 %a, %b
; SI-LABEL: @select_ule_i3(
; SI-NEXT: [[CMP:%.*]] = icmp ule i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_ule_i3(
; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp ule i3 %a, %b
; SI-LABEL: @select_sgt_i3(
; SI-NEXT: [[CMP:%.*]] = icmp sgt i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_sgt_i3(
; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp sgt i3 %a, %b
; SI-LABEL: @select_sge_i3(
; SI-NEXT: [[CMP:%.*]] = icmp sge i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_sge_i3(
; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp sge i3 %a, %b
; SI-LABEL: @select_slt_i3(
; SI-NEXT: [[CMP:%.*]] = icmp slt i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_slt_i3(
; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp slt i3 %a, %b
; SI-LABEL: @select_sle_i3(
; SI-NEXT: [[CMP:%.*]] = icmp sle i3 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]]
-; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @select_sle_i3(
; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3
-; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%cmp = icmp sle i3 %a, %b
define amdgpu_kernel void @bitreverse_i3(i3 %a) {
; SI-LABEL: @bitreverse_i3(
; SI-NEXT: [[BREV:%.*]] = call i3 @llvm.bitreverse.i3(i3 [[A:%.*]])
-; SI-NEXT: store volatile i3 [[BREV]], i3 addrspace(1)* undef
+; SI-NEXT: store volatile i3 [[BREV]], i3 addrspace(1)* undef, align 1
; SI-NEXT: ret void
;
; VI-LABEL: @bitreverse_i3(
; VI-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 29
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef
+; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef, align 1
; VI-NEXT: ret void
;
%brev = call i3 @llvm.bitreverse.i3(i3 %a)
define amdgpu_kernel void @add_i16(i16 %a, i16 %b) {
; SI-LABEL: @add_i16(
; SI-NEXT: [[R:%.*]] = add i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @add_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = add i16 %a, %b
define amdgpu_kernel void @constant_add_i16() {
; SI-LABEL: @constant_add_i16(
; SI-NEXT: [[R:%.*]] = add i16 1, 2
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @constant_add_i16(
-; VI-NEXT: store volatile i16 3, i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 3, i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = add i16 1, 2
define amdgpu_kernel void @constant_add_nsw_i16() {
; SI-LABEL: @constant_add_nsw_i16(
; SI-NEXT: [[R:%.*]] = add nsw i16 1, 2
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @constant_add_nsw_i16(
-; VI-NEXT: store volatile i16 3, i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 3, i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = add nsw i16 1, 2
define amdgpu_kernel void @constant_add_nuw_i16() {
; SI-LABEL: @constant_add_nuw_i16(
; SI-NEXT: [[R:%.*]] = add nsw i16 1, 2
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @constant_add_nuw_i16(
-; VI-NEXT: store volatile i16 3, i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 3, i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = add nsw i16 1, 2
define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) {
; SI-LABEL: @add_nsw_i16(
; SI-NEXT: [[R:%.*]] = add nsw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @add_nsw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = add nsw i16 %a, %b
define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) {
; SI-LABEL: @add_nuw_i16(
; SI-NEXT: [[R:%.*]] = add nuw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @add_nuw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = add nuw i16 %a, %b
define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) {
; SI-LABEL: @add_nuw_nsw_i16(
; SI-NEXT: [[R:%.*]] = add nuw nsw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @add_nuw_nsw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = add nuw nsw i16 %a, %b
define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) {
; SI-LABEL: @sub_i16(
; SI-NEXT: [[R:%.*]] = sub i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @sub_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = sub i16 %a, %b
define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) {
; SI-LABEL: @sub_nsw_i16(
; SI-NEXT: [[R:%.*]] = sub nsw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nsw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = sub nsw i16 %a, %b
define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) {
; SI-LABEL: @sub_nuw_i16(
; SI-NEXT: [[R:%.*]] = sub nuw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nuw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = sub nuw i16 %a, %b
define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) {
; SI-LABEL: @sub_nuw_nsw_i16(
; SI-NEXT: [[R:%.*]] = sub nuw nsw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nuw_nsw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = sub nuw nsw i16 %a, %b
define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) {
; SI-LABEL: @mul_i16(
; SI-NEXT: [[R:%.*]] = mul i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @mul_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = mul i16 %a, %b
define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) {
; SI-LABEL: @mul_nsw_i16(
; SI-NEXT: [[R:%.*]] = mul nsw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nsw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = mul nsw i16 %a, %b
define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) {
; SI-LABEL: @mul_nuw_i16(
; SI-NEXT: [[R:%.*]] = mul nuw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nuw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = mul nuw i16 %a, %b
define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) {
; SI-LABEL: @mul_nuw_nsw_i16(
; SI-NEXT: [[R:%.*]] = mul nuw nsw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nuw_nsw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = mul nuw nsw i16 %a, %b
define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) {
; SI-LABEL: @shl_i16(
; SI-NEXT: [[R:%.*]] = shl i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @shl_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = shl i16 %a, %b
define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) {
; SI-LABEL: @shl_nsw_i16(
; SI-NEXT: [[R:%.*]] = shl nsw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nsw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = shl nsw i16 %a, %b
define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) {
; SI-LABEL: @shl_nuw_i16(
; SI-NEXT: [[R:%.*]] = shl nuw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nuw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = shl nuw i16 %a, %b
define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) {
; SI-LABEL: @shl_nuw_nsw_i16(
; SI-NEXT: [[R:%.*]] = shl nuw nsw i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nuw_nsw_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = shl nuw nsw i16 %a, %b
define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) {
; SI-LABEL: @lshr_i16(
; SI-NEXT: [[R:%.*]] = lshr i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @lshr_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = lshr i16 %a, %b
define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) {
; SI-LABEL: @lshr_exact_i16(
; SI-NEXT: [[R:%.*]] = lshr exact i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @lshr_exact_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = lshr exact i16 %a, %b
define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) {
; SI-LABEL: @ashr_i16(
; SI-NEXT: [[R:%.*]] = ashr i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @ashr_i16(
; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = ashr i16 %a, %b
define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) {
; SI-LABEL: @ashr_exact_i16(
; SI-NEXT: [[R:%.*]] = ashr exact i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @ashr_exact_i16(
; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = ashr exact i16 %a, %b
define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) {
; SI-LABEL: @constant_lshr_exact_i16(
; SI-NEXT: [[R:%.*]] = lshr exact i16 4, 1
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @constant_lshr_exact_i16(
-; VI-NEXT: store volatile i16 2, i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 2, i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = lshr exact i16 4, 1
define amdgpu_kernel void @and_i16(i16 %a, i16 %b) {
; SI-LABEL: @and_i16(
; SI-NEXT: [[R:%.*]] = and i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @and_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = and i16 %a, %b
define amdgpu_kernel void @or_i16(i16 %a, i16 %b) {
; SI-LABEL: @or_i16(
; SI-NEXT: [[R:%.*]] = or i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @or_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = or i16 %a, %b
define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) {
; SI-LABEL: @xor_i16(
; SI-NEXT: [[R:%.*]] = xor i16 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @xor_i16(
; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; VI-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%r = xor i16 %a, %b
; SI-LABEL: @select_eq_i16(
; SI-NEXT: [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_eq_i16(
; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp eq i16 %a, %b
; SI-LABEL: @select_ne_i16(
; SI-NEXT: [[CMP:%.*]] = icmp ne i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_ne_i16(
; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp ne i16 %a, %b
; SI-LABEL: @select_ugt_i16(
; SI-NEXT: [[CMP:%.*]] = icmp ugt i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_ugt_i16(
; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp ugt i16 %a, %b
; SI-LABEL: @select_uge_i16(
; SI-NEXT: [[CMP:%.*]] = icmp uge i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_uge_i16(
; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp uge i16 %a, %b
; SI-LABEL: @select_ult_i16(
; SI-NEXT: [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_ult_i16(
; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp ult i16 %a, %b
; SI-LABEL: @select_ule_i16(
; SI-NEXT: [[CMP:%.*]] = icmp ule i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_ule_i16(
; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp ule i16 %a, %b
; SI-LABEL: @select_sgt_i16(
; SI-NEXT: [[CMP:%.*]] = icmp sgt i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_sgt_i16(
; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp sgt i16 %a, %b
; SI-LABEL: @select_sge_i16(
; SI-NEXT: [[CMP:%.*]] = icmp sge i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_sge_i16(
; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp sge i16 %a, %b
; SI-LABEL: @select_slt_i16(
; SI-NEXT: [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_slt_i16(
; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp slt i16 %a, %b
; SI-LABEL: @select_sle_i16(
; SI-NEXT: [[CMP:%.*]] = icmp sle i16 [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]]
-; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @select_sle_i16(
; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32
; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
-; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%cmp = icmp sle i16 %a, %b
define amdgpu_kernel void @bitreverse_i16(i16 %a) {
; SI-LABEL: @bitreverse_i16(
; SI-NEXT: [[BREV:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[A:%.*]])
-; SI-NEXT: store volatile i16 [[BREV]], i16 addrspace(1)* undef
+; SI-NEXT: store volatile i16 [[BREV]], i16 addrspace(1)* undef, align 2
; SI-NEXT: ret void
;
; VI-LABEL: @bitreverse_i16(
; VI-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]])
; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 16
; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
-; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef
+; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef, align 2
; VI-NEXT: ret void
;
%brev = call i16 @llvm.bitreverse.i16(i16 %a)
define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @add_3xi15(
; SI-NEXT: [[R:%.*]] = add <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @add_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = add <3 x i15> %a, %b
define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @add_nsw_3xi15(
; SI-NEXT: [[R:%.*]] = add nsw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @add_nsw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = add nsw <3 x i15> %a, %b
define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @add_nuw_3xi15(
; SI-NEXT: [[R:%.*]] = add nuw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @add_nuw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = add nuw <3 x i15> %a, %b
define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @add_nuw_nsw_3xi15(
; SI-NEXT: [[R:%.*]] = add nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @add_nuw_nsw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = add nuw nsw <3 x i15> %a, %b
define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @sub_3xi15(
; SI-NEXT: [[R:%.*]] = sub <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @sub_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = sub <3 x i15> %a, %b
define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @sub_nsw_3xi15(
; SI-NEXT: [[R:%.*]] = sub nsw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nsw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = sub nsw <3 x i15> %a, %b
define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @sub_nuw_3xi15(
; SI-NEXT: [[R:%.*]] = sub nuw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nuw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = sub nuw <3 x i15> %a, %b
define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @sub_nuw_nsw_3xi15(
; SI-NEXT: [[R:%.*]] = sub nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nuw_nsw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = sub nuw nsw <3 x i15> %a, %b
define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @mul_3xi15(
; SI-NEXT: [[R:%.*]] = mul <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @mul_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = mul <3 x i15> %a, %b
define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @mul_nsw_3xi15(
; SI-NEXT: [[R:%.*]] = mul nsw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nsw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = mul nsw <3 x i15> %a, %b
define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @mul_nuw_3xi15(
; SI-NEXT: [[R:%.*]] = mul nuw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nuw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = mul nuw <3 x i15> %a, %b
define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @mul_nuw_nsw_3xi15(
; SI-NEXT: [[R:%.*]] = mul nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nuw_nsw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = mul nuw nsw <3 x i15> %a, %b
define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @shl_3xi15(
; SI-NEXT: [[R:%.*]] = shl <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @shl_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = shl <3 x i15> %a, %b
define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @shl_nsw_3xi15(
; SI-NEXT: [[R:%.*]] = shl nsw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nsw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = shl nsw <3 x i15> %a, %b
define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @shl_nuw_3xi15(
; SI-NEXT: [[R:%.*]] = shl nuw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nuw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = shl nuw <3 x i15> %a, %b
define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @shl_nuw_nsw_3xi15(
; SI-NEXT: [[R:%.*]] = shl nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nuw_nsw_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = shl nuw nsw <3 x i15> %a, %b
define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @lshr_3xi15(
; SI-NEXT: [[R:%.*]] = lshr <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @lshr_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = lshr <3 x i15> %a, %b
define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @lshr_exact_3xi15(
; SI-NEXT: [[R:%.*]] = lshr exact <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @lshr_exact_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = lshr exact <3 x i15> %a, %b
define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @ashr_3xi15(
; SI-NEXT: [[R:%.*]] = ashr <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @ashr_3xi15(
; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = ashr <3 x i15> %a, %b
define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @ashr_exact_3xi15(
; SI-NEXT: [[R:%.*]] = ashr exact <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @ashr_exact_3xi15(
; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = ashr exact <3 x i15> %a, %b
define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @and_3xi15(
; SI-NEXT: [[R:%.*]] = and <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @and_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = and <3 x i15> %a, %b
define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @or_3xi15(
; SI-NEXT: [[R:%.*]] = or <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @or_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = or <3 x i15> %a, %b
define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) {
; SI-LABEL: @xor_3xi15(
; SI-NEXT: [[R:%.*]] = xor <3 x i15> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @xor_3xi15(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = xor <3 x i15> %a, %b
; SI-LABEL: @select_eq_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp eq <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_eq_3xi15(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp eq <3 x i15> %a, %b
; SI-LABEL: @select_ne_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp ne <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_ne_3xi15(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp ne <3 x i15> %a, %b
; SI-LABEL: @select_ugt_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp ugt <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_ugt_3xi15(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp ugt <3 x i15> %a, %b
; SI-LABEL: @select_uge_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp uge <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_uge_3xi15(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp uge <3 x i15> %a, %b
; SI-LABEL: @select_ult_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp ult <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_ult_3xi15(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp ult <3 x i15> %a, %b
; SI-LABEL: @select_ule_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp ule <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_ule_3xi15(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp ule <3 x i15> %a, %b
; SI-LABEL: @select_sgt_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp sgt <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_sgt_3xi15(
; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp sgt <3 x i15> %a, %b
; SI-LABEL: @select_sge_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp sge <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_sge_3xi15(
; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp sge <3 x i15> %a, %b
; SI-LABEL: @select_slt_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp slt <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_slt_3xi15(
; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp slt <3 x i15> %a, %b
; SI-LABEL: @select_sle_3xi15(
; SI-NEXT: [[CMP:%.*]] = icmp sle <3 x i15> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]]
-; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_sle_3xi15(
; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp sle <3 x i15> %a, %b
define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) {
; SI-LABEL: @bitreverse_3xi15(
; SI-NEXT: [[BREV:%.*]] = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> [[A:%.*]])
-; SI-NEXT: store volatile <3 x i15> [[BREV]], <3 x i15> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i15> [[BREV]], <3 x i15> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @bitreverse_3xi15(
; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 17, i32 17, i32 17>
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
-; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a)
define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @add_3xi16(
; SI-NEXT: [[R:%.*]] = add <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @add_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = add <3 x i16> %a, %b
define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @add_nsw_3xi16(
; SI-NEXT: [[R:%.*]] = add nsw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @add_nsw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = add nsw <3 x i16> %a, %b
define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @add_nuw_3xi16(
; SI-NEXT: [[R:%.*]] = add nuw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @add_nuw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = add nuw <3 x i16> %a, %b
define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @add_nuw_nsw_3xi16(
; SI-NEXT: [[R:%.*]] = add nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @add_nuw_nsw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = add nuw nsw <3 x i16> %a, %b
define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @sub_3xi16(
; SI-NEXT: [[R:%.*]] = sub <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @sub_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = sub <3 x i16> %a, %b
define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @sub_nsw_3xi16(
; SI-NEXT: [[R:%.*]] = sub nsw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nsw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = sub nsw <3 x i16> %a, %b
define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @sub_nuw_3xi16(
; SI-NEXT: [[R:%.*]] = sub nuw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nuw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = sub nuw <3 x i16> %a, %b
define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @sub_nuw_nsw_3xi16(
; SI-NEXT: [[R:%.*]] = sub nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @sub_nuw_nsw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = sub nuw nsw <3 x i16> %a, %b
define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @mul_3xi16(
; SI-NEXT: [[R:%.*]] = mul <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @mul_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = mul <3 x i16> %a, %b
define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @mul_nsw_3xi16(
; SI-NEXT: [[R:%.*]] = mul nsw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nsw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = mul nsw <3 x i16> %a, %b
define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @mul_nuw_3xi16(
; SI-NEXT: [[R:%.*]] = mul nuw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nuw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = mul nuw <3 x i16> %a, %b
define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @mul_nuw_nsw_3xi16(
; SI-NEXT: [[R:%.*]] = mul nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @mul_nuw_nsw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = mul nuw nsw <3 x i16> %a, %b
define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @shl_3xi16(
; SI-NEXT: [[R:%.*]] = shl <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @shl_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = shl <3 x i16> %a, %b
define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @shl_nsw_3xi16(
; SI-NEXT: [[R:%.*]] = shl nsw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nsw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = shl nsw <3 x i16> %a, %b
define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @shl_nuw_3xi16(
; SI-NEXT: [[R:%.*]] = shl nuw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nuw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = shl nuw <3 x i16> %a, %b
define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @shl_nuw_nsw_3xi16(
; SI-NEXT: [[R:%.*]] = shl nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @shl_nuw_nsw_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = shl nuw nsw <3 x i16> %a, %b
define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @lshr_3xi16(
; SI-NEXT: [[R:%.*]] = lshr <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @lshr_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = lshr <3 x i16> %a, %b
define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @lshr_exact_3xi16(
; SI-NEXT: [[R:%.*]] = lshr exact <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @lshr_exact_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = lshr exact <3 x i16> %a, %b
define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @ashr_3xi16(
; SI-NEXT: [[R:%.*]] = ashr <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @ashr_3xi16(
; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = ashr <3 x i16> %a, %b
define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @ashr_exact_3xi16(
; SI-NEXT: [[R:%.*]] = ashr exact <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @ashr_exact_3xi16(
; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = ashr exact <3 x i16> %a, %b
define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @and_3xi16(
; SI-NEXT: [[R:%.*]] = and <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @and_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = and <3 x i16> %a, %b
define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @or_3xi16(
; SI-NEXT: [[R:%.*]] = or <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @or_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = or <3 x i16> %a, %b
define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
; SI-LABEL: @xor_3xi16(
; SI-NEXT: [[R:%.*]] = xor <3 x i16> [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @xor_3xi16(
; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32>
; VI-NEXT: [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]]
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%r = xor <3 x i16> %a, %b
; SI-LABEL: @select_eq_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp eq <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_eq_3xi16(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp eq <3 x i16> %a, %b
; SI-LABEL: @select_ne_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp ne <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_ne_3xi16(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp ne <3 x i16> %a, %b
; SI-LABEL: @select_ugt_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp ugt <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_ugt_3xi16(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp ugt <3 x i16> %a, %b
; SI-LABEL: @select_uge_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp uge <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_uge_3xi16(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp uge <3 x i16> %a, %b
; SI-LABEL: @select_ult_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp ult <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_ult_3xi16(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp ult <3 x i16> %a, %b
; SI-LABEL: @select_ule_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp ule <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_ule_3xi16(
; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp ule <3 x i16> %a, %b
; SI-LABEL: @select_sgt_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp sgt <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_sgt_3xi16(
; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp sgt <3 x i16> %a, %b
; SI-LABEL: @select_sge_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp sge <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_sge_3xi16(
; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp sge <3 x i16> %a, %b
; SI-LABEL: @select_slt_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp slt <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_slt_3xi16(
; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp slt <3 x i16> %a, %b
; SI-LABEL: @select_sle_3xi16(
; SI-NEXT: [[CMP:%.*]] = icmp sle <3 x i16> [[A:%.*]], [[B:%.*]]
; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]]
-; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @select_sle_3xi16(
; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32>
; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]]
; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%cmp = icmp sle <3 x i16> %a, %b
define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) {
; SI-LABEL: @bitreverse_3xi16(
; SI-NEXT: [[BREV:%.*]] = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> [[A:%.*]])
-; SI-NEXT: store volatile <3 x i16> [[BREV]], <3 x i16> addrspace(1)* undef
+; SI-NEXT: store volatile <3 x i16> [[BREV]], <3 x i16> addrspace(1)* undef, align 8
; SI-NEXT: ret void
;
; VI-LABEL: @bitreverse_3xi16(
; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 16, i32 16, i32 16>
; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
-; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef
+; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef, align 8
; VI-NEXT: ret void
;
%brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a)
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX6-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
; GFX6-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
; GFX6-NEXT: buffer_store_byte v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v4i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_v4i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34
; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s2, v3
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_v4i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34
; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s5, v3
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_v4i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_and_b32_e32 v0, 7, v0
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_and_b32_e32 v0, 7, v0
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX6-NEXT: v_and_b32_e32 v0, 7, v0
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_and_b32_e32 v0, 7, v0
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
; GFX6-NEXT: buffer_store_short v2, off, s[4:7], 0 offset:4
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v3i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: buffer_store_short v2, off, s[4:7], 0 offset:4
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_v3i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: buffer_store_short v2, off, s[4:7], 0 offset:4
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_v3i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: buffer_store_short v2, off, s[4:7], 0 offset:4
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_v3i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX6-NEXT: v_and_b32_e32 v0, 0x1fff, v1
; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v3i15:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_and_b32_e32 v0, 0x1fff, v1
; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_v3i15:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_and_b32_e32 v0, 0x1fff, v1
; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_v3i15:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_and_b32_e32 v0, 0x1fff, v1
; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_v3i15:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 20, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i32_oddk_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i32_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i32_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v1, s1
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v2i32_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v2i32_mixed_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v2i32_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i32_oddk_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i32_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i32_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v1, s1
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_v2i32_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_v2i32_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i32_oddk_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i32_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i32_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX6-NEXT: v_mov_b32_e32 v1, s1
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_v2i32_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: ssdiv_v2i32_mixed_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s2, v1
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_v2i32_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i32_oddk_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i32_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i32_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX6-NEXT: v_mov_b32_e32 v1, s1
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_v2i32_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s0, v1
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_v2i32_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i64_oddk_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f176a73
; GFX6-NEXT: v_mov_b32_e32 v1, s1
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i64_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v3, s3
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v2i64_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v1, s3
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v2i64_mixed_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f800000
; GFX6-NEXT: v_mov_b32_e32 v3, s3
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: udiv_v2i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i64_oddk_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f1761f8
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i64_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v3, v1
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_v2i64_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v3, s3
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: urem_v2i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i64_oddk_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f800000
; GFX6-NEXT: v_mov_b32_e32 v1, s1
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i64_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s4, s[0:1], 0x34
; GFX6-NEXT: v_mov_b32_e32 v3, s3
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_v2i64_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_mov_b32_e32 v1, s3
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: ssdiv_v2i64_mixed_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, 0x457ff000
; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: sdiv_v2i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x44
; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i64_oddk_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f800000
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i64_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s4, s[0:1], 0x34
; GFX6-NEXT: v_mov_b32_e32 v3, s3
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_v2i64_pow2k_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
-;
; GFX9-LABEL: srem_v2i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x44