From: David Green Date: Tue, 3 Sep 2019 09:42:16 +0000 (+0000) Subject: [ARM] More MVE load/store tests for offsets around the negative limit. NFC X-Git-Tag: llvmorg-11-init~10206 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=855caf2335c0837666f8174d79e4df48b40dcb12;p=platform%2Fupstream%2Fllvm.git [ARM] More MVE load/store tests for offsets around the negative limit. NFC llvm-svn: 370726 --- diff --git a/llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll b/llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll index 5a0a605..8e4f517 100644 --- a/llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll +++ b/llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll @@ -196,6 +196,40 @@ entry: ret i8* %x } +define i8* @ldrhu32_m254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #254 +; CHECK-NEXT: vldrh.u32 q0, [r2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_m256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #256 +; CHECK-NEXT: vldrh.u32 q0, [r2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + define i8* @ldrhs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhs32_4: @@ -280,6 +314,40 @@ entry: ret i8* %x } +define i8* @ldrhs32_m254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #254 +; CHECK-NEXT: vldrh.s32 q0, [r2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_m256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #256 +; CHECK-NEXT: vldrh.s32 q0, [r2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + define i8* @ldrhu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_4: @@ -359,6 +427,38 @@ entry: ret i8* %x } +define i8* @ldrhu16_m254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #254 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_m256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + define i8* @ldrbu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu32_4: @@ -426,6 +526,40 @@ entry: ret i8* %x } +define i8* @ldrbu32_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #127 +; CHECK-NEXT: vldrb.u32 q0, [r2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #128 +; CHECK-NEXT: vldrb.u32 q0, [r2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + define i8* @ldrbs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs32_4: @@ -493,6 +627,40 @@ entry: ret i8* %x } +define i8* @ldrbs32_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #127 +; CHECK-NEXT: vldrb.s32 q0, [r2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #128 +; CHECK-NEXT: vldrb.s32 q0, [r2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + define i8* @ldrbu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu16_4: @@ -560,6 +728,40 @@ entry: ret i8* %x } +define i8* @ldrbu16_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #127 +; CHECK-NEXT: vldrb.u16 q0, [r2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #128 +; CHECK-NEXT: vldrb.u16 q0, [r2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + define i8* @ldrbs16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs16_4: @@ -627,6 +829,40 @@ entry: ret i8* %x } +define i8* @ldrbs16_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #127 +; CHECK-NEXT: vldrb.s16 q0, [r2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #128 +; CHECK-NEXT: vldrb.s16 q0, [r2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + define i8* @ldrbu8_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu8_4: @@ -690,6 +926,39 @@ entry: ret i8* %x } +define i8* @ldrbu8_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #127 +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r2, r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + + define i8* @ldrwf32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwf32_4: ; CHECK: @ %bb.0: @ %entry @@ -1045,6 +1314,38 @@ entry: ret i8* %y } +define i8* @strh32_m254(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #254 +; CHECK-NEXT: vstrh.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + store <4 x i16> %1, <4 x i16>* %2, align 2 + ret i8* %y +} + +define i8* @strh32_m256(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #256 +; CHECK-NEXT: vstrh.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + store <4 x i16> %1, <4 x i16>* %2, align 2 + ret i8* %y +} + define i8* @strh16_4(i8* %y, i8* %x) { ; CHECK-LABEL: strh16_4: @@ -1124,6 +1425,38 @@ entry: ret i8* %y } +define i8* @strh16_m254(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %y +} + +define i8* @strh16_m256(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %y +} + define i8* @strb32_4(i8* %y, i8* %x) { ; CHECK-LABEL: strb32_4: @@ -1187,6 +1520,38 @@ entry: ret i8* %y } +define i8* @strb32_m127(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #127 +; CHECK-NEXT: vstrb.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + store <4 x i8> %1, <4 x i8>* %2, align 1 + ret i8* %y +} + +define i8* @strb32_m128(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #128 +; CHECK-NEXT: vstrb.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + store <4 x i8> %1, <4 x i8>* %2, align 1 + ret i8* %y +} + define i8* @strb16_4(i8* %y, i8* %x) { ; CHECK-LABEL: strb16_4: @@ -1250,6 +1615,38 @@ entry: ret i8* %y } +define i8* @strb16_m127(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #127 +; CHECK-NEXT: vstrb.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + store <8 x i8> %1, <8 x i8>* %2, align 1 + ret i8* %y +} + +define i8* @strb16_m128(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #128 +; CHECK-NEXT: vstrb.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + store <8 x i8> %1, <8 x i8>* %2, align 1 + ret i8* %y +} + define i8* @strb8_4(i8* %y, i8* %x) { ; CHECK-LABEL: strb8_4: @@ -1313,6 +1710,39 @@ entry: ret i8* %y } +define i8* @strb8_m127(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #127 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %y +} + +define i8* @strb8_m128(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #128 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %y +} + + define i8* @strf32_4(i8* %y, i8* %x) { ; CHECK-LABEL: strf32_4: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll b/llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll index 61afa72..d84724a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll +++ b/llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll @@ -199,6 +199,39 @@ entry: ret i8* %z } +define i8* @ldrhu32_m254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r0], #-254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + define i8* @ldrhs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhs32_4: @@ -282,6 +315,39 @@ entry: ret i8* %z } +define i8* @ldrhs32_m254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.s32 q0, [r0], #-254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.s32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + define i8* @ldrhu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_4: @@ -366,6 +432,37 @@ entry: ret i8* %z } +define i8* @ldrhu16_m254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r0], #-254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @ldrbu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu32_4: @@ -432,6 +529,39 @@ entry: ret i8* %z } +define i8* @ldrbu32_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r0], #-127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + define i8* @ldrbs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs32_4: @@ -498,6 +628,39 @@ entry: ret i8* %z } +define i8* @ldrbs32_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.s32 q0, [r0], #-127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.s32 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + define i8* @ldrbu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu16_4: @@ -564,6 +727,39 @@ entry: ret i8* %z } +define i8* @ldrbu16_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r0], #-127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + define i8* @ldrbs16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs16_4: @@ -630,6 +826,39 @@ entry: ret i8* %z } +define i8* @ldrbs16_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.s16 q0, [r0], #-127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.s16 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + define i8* @ldrbu8_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu8_4: @@ -692,6 +921,38 @@ entry: ret i8* %z } +define i8* @ldrbu8_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0], #-127 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + + define i8* @ldrwf32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwf32_4: ; CHECK: @ %bb.0: @ %entry @@ -1067,6 +1328,37 @@ entry: ret i8* %z } +define i8* @strh32_m254(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vstrh.32 q0, [r0], #-254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + store <4 x i16> %1, <4 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @strh32_m256(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vstrh.32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + store <4 x i16> %1, <4 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strh16_4(i8* %y, i8* %x) { ; CHECK-LE-LABEL: strh16_4: @@ -1163,6 +1455,37 @@ entry: ret i8* %z } +define i8* @strh16_m254(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0], #-254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @strh16_m256(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strb32_4(i8* %y, i8* %x) { ; CHECK-LABEL: strb32_4: @@ -1225,6 +1548,37 @@ entry: ret i8* %z } +define i8* @strb32_m127(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vstrb.32 q0, [r0], #-127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + store <4 x i8> %1, <4 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @strb32_m128(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vstrb.32 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + store <4 x i8> %1, <4 x i8>* %2, align 1 + ret i8* %z +} + define i8* @strb16_4(i8* %y, i8* %x) { ; CHECK-LABEL: strb16_4: @@ -1287,6 +1641,37 @@ entry: ret i8* %z } +define i8* @strb16_m127(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vstrb.16 q0, [r0], #-127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + store <8 x i8> %1, <8 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @strb16_m128(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vstrb.16 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + store <8 x i8> %1, <8 x i8>* %2, align 1 + ret i8* %z +} + define i8* @strb8_4(i8* %y, i8* %x) { ; CHECK-LABEL: strb8_4: @@ -1349,6 +1734,38 @@ entry: ret i8* %z } +define i8* @strb8_m127(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0], #-127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @strb8_m128(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + + define i8* @strf32_4(i8* %y, i8* %x) { ; CHECK-LE-LABEL: strf32_4: ; CHECK-LE: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll b/llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll index ca1731a..bf732c6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll +++ b/llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll @@ -199,6 +199,39 @@ entry: ret i8* %z } +define i8* @ldrhu32_m254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r0, #-254]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + define i8* @ldrhs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhs32_4: @@ -282,6 +315,39 @@ entry: ret i8* %z } +define i8* @ldrhs32_m254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.s32 q0, [r0, #-254]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhs32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + define i8* @ldrhu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_4: @@ -366,6 +432,37 @@ entry: ret i8* %z } +define i8* @ldrhu16_m254(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r0, #-254]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m256(i8* %x, i8* %y) { +; CHECK-LABEL: ldrhu16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @ldrbu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu32_4: @@ -432,6 +529,39 @@ entry: ret i8* %z } +define i8* @ldrbu32_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r0, #-127]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + define i8* @ldrbs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs32_4: @@ -498,6 +628,39 @@ entry: ret i8* %z } +define i8* @ldrbs32_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.s32 q0, [r0, #-127]! +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + define i8* @ldrbu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu16_4: @@ -564,6 +727,39 @@ entry: ret i8* %z } +define i8* @ldrbu16_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r0, #-127]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + define i8* @ldrbs16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs16_4: @@ -630,6 +826,39 @@ entry: ret i8* %z } +define i8* @ldrbs16_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.s16 q0, [r0, #-127]! +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbs16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.s16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + define i8* @ldrbu8_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu8_4: @@ -692,6 +921,38 @@ entry: ret i8* %z } +define i8* @ldrbu8_m127(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r0, #-127]! +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m128(i8* %x, i8* %y) { +; CHECK-LABEL: ldrbu8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + + define i8* @ldrwf32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwf32_4: ; CHECK: @ %bb.0: @ %entry @@ -1067,6 +1328,37 @@ entry: ret i8* %z } +define i8* @strh32_m254(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vstrh.32 q0, [r0, #-254]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + store <4 x i16> %1, <4 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @strh32_m256(i8* %y, i8* %x) { +; CHECK-LABEL: strh32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vstrh.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + store <4 x i16> %1, <4 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strh16_4(i8* %y, i8* %x) { ; CHECK-LE-LABEL: strh16_4: @@ -1163,6 +1455,37 @@ entry: ret i8* %z } +define i8* @strh16_m254(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0, #-254]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @strh16_m256(i8* %y, i8* %x) { +; CHECK-LABEL: strh16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vstrh.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strb32_4(i8* %y, i8* %x) { ; CHECK-LABEL: strb32_4: @@ -1225,6 +1548,37 @@ entry: ret i8* %z } +define i8* @strb32_m127(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vstrb.32 q0, [r0, #-127]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + store <4 x i8> %1, <4 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @strb32_m128(i8* %y, i8* %x) { +; CHECK-LABEL: strb32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vstrb.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + store <4 x i8> %1, <4 x i8>* %2, align 1 + ret i8* %z +} + define i8* @strb16_4(i8* %y, i8* %x) { ; CHECK-LABEL: strb16_4: @@ -1287,6 +1641,37 @@ entry: ret i8* %z } +define i8* @strb16_m127(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vstrb.16 q0, [r0, #-127]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + store <8 x i8> %1, <8 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @strb16_m128(i8* %y, i8* %x) { +; CHECK-LABEL: strb16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vstrb.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + store <8 x i8> %1, <8 x i8>* %2, align 1 + ret i8* %z +} + define i8* @strb8_4(i8* %y, i8* %x) { ; CHECK-LABEL: strb8_4: @@ -1349,6 +1734,38 @@ entry: ret i8* %z } +define i8* @strb8_m127(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0, #-127]! +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @strb8_m128(i8* %y, i8* %x) { +; CHECK-LABEL: strb8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vstrb.8 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + + define i8* @strf32_4(i8* %y, i8* %x) { ; CHECK-LE-LABEL: strf32_4: ; CHECK-LE: @ %bb.0: @ %entry