From: David Green Date: Sun, 12 Sep 2021 11:13:29 +0000 (+0100) Subject: [AArch64] Regenerate some test checks. NFC X-Git-Tag: upstream/15.0.7~31662 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d338e535ec5f1de8b1b6cf7ea74514dfe1ecd0ce;p=platform%2Fupstream%2Fllvm.git [AArch64] Regenerate some test checks. NFC This regenerates some of the tests that had very-close-to-updated check line already, in order to make them more maintainable. --- diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index 018a114..477255b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -1,10 +1,12 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-redzone | FileCheck %s -; RUN: llc < %s -mtriple=arm64_32-apple-ios -aarch64-redzone | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-redzone | FileCheck %s --check-prefixes=CHECK,CHECK64 +; RUN: llc < %s -mtriple=arm64_32-apple-ios -aarch64-redzone | FileCheck %s --check-prefixes=CHECK,CHECK32 define i64* @store64(i64* %ptr, i64 %index, i64 %spacing) { ; CHECK-LABEL: store64: -; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #8 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str x2, [x0], #8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 1 store i64 %spacing, i64* %ptr, align 4 ret i64* %incdec.ptr @@ -12,9 +14,11 @@ define i64* @store64(i64* %ptr, i64 %index, i64 %spacing) { define i64* @store64idxpos256(i64* %ptr, i64 %index, i64 %spacing) { ; CHECK-LABEL: store64idxpos256: -; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256 -; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}] -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 +; CHECK-NEXT: str x2, [x0] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 32 store i64 %spacing, i64* %ptr, align 4 ret i64* %incdec.ptr @@ -22,8 +26,9 @@ define i64* @store64idxpos256(i64* %ptr, i64 %index, i64 %spacing) { define i64* @store64idxneg256(i64* %ptr, i64 %index, i64 %spacing) { ; CHECK-LABEL: store64idxneg256: -; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #-256 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str x2, [x0], #-256 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 -32 store i64 %spacing, i64* %ptr, align 4 ret i64* %incdec.ptr @@ -31,8 +36,9 @@ define i64* @store64idxneg256(i64* %ptr, i64 %index, i64 %spacing) { define i32* @store32(i32* %ptr, i32 %index, i32 %spacing) { ; CHECK-LABEL: store32: -; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str w2, [x0], #4 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 1 store i32 %spacing, i32* %ptr, align 4 ret i32* %incdec.ptr @@ -40,9 +46,11 @@ define i32* @store32(i32* %ptr, i32 %index, i32 %spacing) { define i32* @store32idxpos256(i32* %ptr, i32 %index, i32 %spacing) { ; CHECK-LABEL: store32idxpos256: -; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256 -; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}] -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 +; CHECK-NEXT: str w2, [x0] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 64 store i32 %spacing, i32* %ptr, align 4 ret i32* %incdec.ptr @@ -50,8 +58,9 @@ define i32* @store32idxpos256(i32* %ptr, i32 %index, i32 %spacing) { define i32* @store32idxneg256(i32* %ptr, i32 %index, i32 %spacing) { ; CHECK-LABEL: store32idxneg256: -; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #-256 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str w2, [x0], #-256 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 -64 store i32 %spacing, i32* %ptr, align 4 ret i32* %incdec.ptr @@ -59,8 +68,9 @@ define i32* @store32idxneg256(i32* %ptr, i32 %index, i32 %spacing) { define i16* @store16(i16* %ptr, i16 %index, i16 %spacing) { ; CHECK-LABEL: store16: -; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w2, [x0], #2 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 1 store i16 %spacing, i16* %ptr, align 4 ret i16* %incdec.ptr @@ -68,9 +78,11 @@ define i16* @store16(i16* %ptr, i16 %index, i16 %spacing) { define i16* @store16idxpos256(i16* %ptr, i16 %index, i16 %spacing) { ; CHECK-LABEL: store16idxpos256: -; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256 -; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}] -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 +; CHECK-NEXT: strh w2, [x0] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 128 store i16 %spacing, i16* %ptr, align 4 ret i16* %incdec.ptr @@ -78,8 +90,9 @@ define i16* @store16idxpos256(i16* %ptr, i16 %index, i16 %spacing) { define i16* @store16idxneg256(i16* %ptr, i16 %index, i16 %spacing) { ; CHECK-LABEL: store16idxneg256: -; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #-256 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w2, [x0], #-256 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 -128 store i16 %spacing, i16* %ptr, align 4 ret i16* %incdec.ptr @@ -87,8 +100,9 @@ define i16* @store16idxneg256(i16* %ptr, i16 %index, i16 %spacing) { define i8* @store8(i8* %ptr, i8 %index, i8 %spacing) { ; CHECK-LABEL: store8: -; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w2, [x0], #1 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 1 store i8 %spacing, i8* %ptr, align 4 ret i8* %incdec.ptr @@ -96,9 +110,11 @@ define i8* @store8(i8* %ptr, i8 %index, i8 %spacing) { define i8* @store8idxpos256(i8* %ptr, i8 %index, i8 %spacing) { ; CHECK-LABEL: store8idxpos256: -; CHECK: add x{{[0-9+]}}, x{{[0-9+]}}, #256 -; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}] -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 +; CHECK-NEXT: strb w2, [x0] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 256 store i8 %spacing, i8* %ptr, align 4 ret i8* %incdec.ptr @@ -106,8 +122,9 @@ define i8* @store8idxpos256(i8* %ptr, i8 %index, i8 %spacing) { define i8* @store8idxneg256(i8* %ptr, i8 %index, i8 %spacing) { ; CHECK-LABEL: store8idxneg256: -; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #-256 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w2, [x0], #-256 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 -256 store i8 %spacing, i8* %ptr, align 4 ret i8* %incdec.ptr @@ -115,8 +132,9 @@ define i8* @store8idxneg256(i8* %ptr, i8 %index, i8 %spacing) { define i32* @truncst64to32(i32* %ptr, i32 %index, i64 %spacing) { ; CHECK-LABEL: truncst64to32: -; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str w2, [x0], #4 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 1 %trunc = trunc i64 %spacing to i32 store i32 %trunc, i32* %ptr, align 4 @@ -125,8 +143,9 @@ define i32* @truncst64to32(i32* %ptr, i32 %index, i64 %spacing) { define i16* @truncst64to16(i16* %ptr, i16 %index, i64 %spacing) { ; CHECK-LABEL: truncst64to16: -; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w2, [x0], #2 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 1 %trunc = trunc i64 %spacing to i16 store i16 %trunc, i16* %ptr, align 4 @@ -135,8 +154,9 @@ define i16* @truncst64to16(i16* %ptr, i16 %index, i64 %spacing) { define i8* @truncst64to8(i8* %ptr, i8 %index, i64 %spacing) { ; CHECK-LABEL: truncst64to8: -; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w2, [x0], #1 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 1 %trunc = trunc i64 %spacing to i8 store i8 %trunc, i8* %ptr, align 4 @@ -146,8 +166,9 @@ define i8* @truncst64to8(i8* %ptr, i8 %index, i64 %spacing) { define half* @storef16(half* %ptr, half %index, half %spacing) nounwind { ; CHECK-LABEL: storef16: -; CHECK: str h{{[0-9+]}}, [x{{[0-9+]}}], #2 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str h1, [x0], #2 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds half, half* %ptr, i64 1 store half %spacing, half* %ptr, align 2 ret half* %incdec.ptr @@ -155,8 +176,9 @@ define half* @storef16(half* %ptr, half %index, half %spacing) nounwind { define float* @storef32(float* %ptr, float %index, float %spacing) { ; CHECK-LABEL: storef32: -; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str s1, [x0], #4 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds float, float* %ptr, i64 1 store float %spacing, float* %ptr, align 4 ret float* %incdec.ptr @@ -164,8 +186,9 @@ define float* @storef32(float* %ptr, float %index, float %spacing) { define double* @storef64(double* %ptr, double %index, double %spacing) { ; CHECK-LABEL: storef64: -; CHECK: str d{{[0-9+]}}, [x{{[0-9+]}}], #8 -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str d1, [x0], #8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds double, double* %ptr, i64 1 store double %spacing, double* %ptr, align 4 ret double* %incdec.ptr @@ -174,8 +197,9 @@ define double* @storef64(double* %ptr, double %index, double %spacing) { define double* @pref64(double* %ptr, double %spacing) { ; CHECK-LABEL: pref64: -; CHECK: str d0, [x0, #32]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str d0, [x0, #32]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds double, double* %ptr, i64 4 store double %spacing, double* %incdec.ptr, align 4 ret double *%incdec.ptr @@ -183,8 +207,9 @@ define double* @pref64(double* %ptr, double %spacing) { define float* @pref32(float* %ptr, float %spacing) { ; CHECK-LABEL: pref32: -; CHECK: str s0, [x0, #12]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str s0, [x0, #12]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds float, float* %ptr, i64 3 store float %spacing, float* %incdec.ptr, align 4 ret float *%incdec.ptr @@ -192,8 +217,9 @@ define float* @pref32(float* %ptr, float %spacing) { define half* @pref16(half* %ptr, half %spacing) nounwind { ; CHECK-LABEL: pref16: -; CHECK: str h0, [x0, #6]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str h0, [x0, #6]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds half, half* %ptr, i64 3 store half %spacing, half* %incdec.ptr, align 2 ret half *%incdec.ptr @@ -201,8 +227,9 @@ define half* @pref16(half* %ptr, half %spacing) nounwind { define i64* @pre64(i64* %ptr, i64 %spacing) { ; CHECK-LABEL: pre64: -; CHECK: str x1, [x0, #16]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str x1, [x0, #16]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 2 store i64 %spacing, i64* %incdec.ptr, align 4 ret i64 *%incdec.ptr @@ -210,10 +237,11 @@ define i64* @pre64(i64* %ptr, i64 %spacing) { define i64* @pre64idxpos256(i64* %ptr, i64 %spacing) { ; CHECK-LABEL: pre64idxpos256: -; CHECK: add x8, x0, #256 -; CHECK-NEXT: str x1, [x0, #256] -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 +; CHECK-NEXT: str x1, [x0, #256] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 32 store i64 %spacing, i64* %incdec.ptr, align 4 ret i64 *%incdec.ptr @@ -221,8 +249,9 @@ define i64* @pre64idxpos256(i64* %ptr, i64 %spacing) { define i64* @pre64idxneg256(i64* %ptr, i64 %spacing) { ; CHECK-LABEL: pre64idxneg256: -; CHECK: str x1, [x0, #-256]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str x1, [x0, #-256]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i64, i64* %ptr, i64 -32 store i64 %spacing, i64* %incdec.ptr, align 4 ret i64 *%incdec.ptr @@ -230,8 +259,9 @@ define i64* @pre64idxneg256(i64* %ptr, i64 %spacing) { define i32* @pre32(i32* %ptr, i32 %spacing) { ; CHECK-LABEL: pre32: -; CHECK: str w1, [x0, #8]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str w1, [x0, #8]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 2 store i32 %spacing, i32* %incdec.ptr, align 4 ret i32 *%incdec.ptr @@ -239,10 +269,11 @@ define i32* @pre32(i32* %ptr, i32 %spacing) { define i32* @pre32idxpos256(i32* %ptr, i32 %spacing) { ; CHECK-LABEL: pre32idxpos256: -; CHECK: add x8, x0, #256 -; CHECK-NEXT: str w1, [x0, #256] -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 +; CHECK-NEXT: str w1, [x0, #256] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 64 store i32 %spacing, i32* %incdec.ptr, align 4 ret i32 *%incdec.ptr @@ -250,8 +281,9 @@ define i32* @pre32idxpos256(i32* %ptr, i32 %spacing) { define i32* @pre32idxneg256(i32* %ptr, i32 %spacing) { ; CHECK-LABEL: pre32idxneg256: -; CHECK: str w1, [x0, #-256]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str w1, [x0, #-256]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 -64 store i32 %spacing, i32* %incdec.ptr, align 4 ret i32 *%incdec.ptr @@ -259,8 +291,9 @@ define i32* @pre32idxneg256(i32* %ptr, i32 %spacing) { define i16* @pre16(i16* %ptr, i16 %spacing) { ; CHECK-LABEL: pre16: -; CHECK: strh w1, [x0, #4]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w1, [x0, #4]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 2 store i16 %spacing, i16* %incdec.ptr, align 4 ret i16 *%incdec.ptr @@ -268,10 +301,11 @@ define i16* @pre16(i16* %ptr, i16 %spacing) { define i16* @pre16idxpos256(i16* %ptr, i16 %spacing) { ; CHECK-LABEL: pre16idxpos256: -; CHECK: add x8, x0, #256 -; CHECK-NEXT: strh w1, [x0, #256] -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 +; CHECK-NEXT: strh w1, [x0, #256] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 128 store i16 %spacing, i16* %incdec.ptr, align 4 ret i16 *%incdec.ptr @@ -279,8 +313,9 @@ define i16* @pre16idxpos256(i16* %ptr, i16 %spacing) { define i16* @pre16idxneg256(i16* %ptr, i16 %spacing) { ; CHECK-LABEL: pre16idxneg256: -; CHECK: strh w1, [x0, #-256]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w1, [x0, #-256]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 -128 store i16 %spacing, i16* %incdec.ptr, align 4 ret i16 *%incdec.ptr @@ -288,8 +323,9 @@ define i16* @pre16idxneg256(i16* %ptr, i16 %spacing) { define i8* @pre8(i8* %ptr, i8 %spacing) { ; CHECK-LABEL: pre8: -; CHECK: strb w1, [x0, #2]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w1, [x0, #2]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 2 store i8 %spacing, i8* %incdec.ptr, align 4 ret i8 *%incdec.ptr @@ -297,10 +333,11 @@ define i8* @pre8(i8* %ptr, i8 %spacing) { define i8* @pre8idxpos256(i8* %ptr, i8 %spacing) { ; CHECK-LABEL: pre8idxpos256: -; CHECK: add x8, x0, #256 -; CHECK-NEXT: strb w1, [x0, #256] -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 +; CHECK-NEXT: strb w1, [x0, #256] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 256 store i8 %spacing, i8* %incdec.ptr, align 4 ret i8 *%incdec.ptr @@ -308,8 +345,9 @@ define i8* @pre8idxpos256(i8* %ptr, i8 %spacing) { define i8* @pre8idxneg256(i8* %ptr, i8 %spacing) { ; CHECK-LABEL: pre8idxneg256: -; CHECK: strb w1, [x0, #-256]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w1, [x0, #-256]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 -256 store i8 %spacing, i8* %incdec.ptr, align 4 ret i8 *%incdec.ptr @@ -317,8 +355,9 @@ define i8* @pre8idxneg256(i8* %ptr, i8 %spacing) { define i32* @pretrunc64to32(i32* %ptr, i64 %spacing) { ; CHECK-LABEL: pretrunc64to32: -; CHECK: str w1, [x0, #8]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: str w1, [x0, #8]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i32, i32* %ptr, i64 2 %trunc = trunc i64 %spacing to i32 store i32 %trunc, i32* %incdec.ptr, align 4 @@ -327,8 +366,9 @@ define i32* @pretrunc64to32(i32* %ptr, i64 %spacing) { define i16* @pretrunc64to16(i16* %ptr, i64 %spacing) { ; CHECK-LABEL: pretrunc64to16: -; CHECK: strh w1, [x0, #4]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strh w1, [x0, #4]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i16, i16* %ptr, i64 2 %trunc = trunc i64 %spacing to i16 store i16 %trunc, i16* %incdec.ptr, align 4 @@ -337,8 +377,9 @@ define i16* @pretrunc64to16(i16* %ptr, i64 %spacing) { define i8* @pretrunc64to8(i8* %ptr, i64 %spacing) { ; CHECK-LABEL: pretrunc64to8: -; CHECK: strb w1, [x0, #2]! -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: strb w1, [x0, #2]! +; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i8, i8* %ptr, i64 2 %trunc = trunc i64 %spacing to i8 store i8 %trunc, i8* %incdec.ptr, align 4 @@ -350,9 +391,10 @@ define i8* @pretrunc64to8(i8* %ptr, i64 %spacing) { ;----- define double* @preidxf64(double* %src, double* %out) { ; CHECK-LABEL: preidxf64: -; CHECK: ldr d0, [x0, #8]! -; CHECK: str d0, [x1] -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #8]! +; CHECK-NEXT: str d0, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds double, double* %src, i64 1 %tmp = load double, double* %ptr, align 4 store double %tmp, double* %out, align 4 @@ -361,9 +403,10 @@ define double* @preidxf64(double* %src, double* %out) { define float* @preidxf32(float* %src, float* %out) { ; CHECK-LABEL: preidxf32: -; CHECK: ldr s0, [x0, #4]! -; CHECK: str s0, [x1] -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr s0, [x0, #4]! +; CHECK-NEXT: str s0, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds float, float* %src, i64 1 %tmp = load float, float* %ptr, align 4 store float %tmp, float* %out, align 4 @@ -372,9 +415,10 @@ define float* @preidxf32(float* %src, float* %out) { define half* @preidxf16(half* %src, half* %out) { ; CHECK-LABEL: preidxf16: -; CHECK: ldr h0, [x0, #2]! -; CHECK: str h0, [x1] -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr h0, [x0, #2]! +; CHECK-NEXT: str h0, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds half, half* %src, i64 1 %tmp = load half, half* %ptr, align 2 store half %tmp, half* %out, align 2 @@ -383,9 +427,10 @@ define half* @preidxf16(half* %src, half* %out) { define i64* @preidx64(i64* %src, i64* %out) { ; CHECK-LABEL: preidx64: -; CHECK: ldr x[[REG:[0-9]+]], [x0, #8]! -; CHECK: str x[[REG]], [x1] -; CHECK: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr x8, [x0, #8]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i64, i64* %src, i64 1 %tmp = load i64, i64* %ptr, align 4 store i64 %tmp, i64* %out, align 4 @@ -393,9 +438,11 @@ define i64* @preidx64(i64* %src, i64* %out) { } define i32* @preidx32(i32* %src, i32* %out) { -; CHECK: ldr w[[REG:[0-9]+]], [x0, #4]! -; CHECK: str w[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr w8, [x0, #4]! +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i32, i32* %src, i64 1 %tmp = load i32, i32* %ptr, align 4 store i32 %tmp, i32* %out, align 4 @@ -403,9 +450,11 @@ define i32* @preidx32(i32* %src, i32* %out) { } define i16* @preidx16zext32(i16* %src, i32* %out) { -; CHECK: ldrh w[[REG:[0-9]+]], [x0, #2]! -; CHECK: str w[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx16zext32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrh w8, [x0, #2]! +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i16, i16* %src, i64 1 %tmp = load i16, i16* %ptr, align 4 %ext = zext i16 %tmp to i32 @@ -414,9 +463,11 @@ define i16* @preidx16zext32(i16* %src, i32* %out) { } define i16* @preidx16zext64(i16* %src, i64* %out) { -; CHECK: ldrh w[[REG:[0-9]+]], [x0, #2]! -; CHECK: str x[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx16zext64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrh w8, [x0, #2]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i16, i16* %src, i64 1 %tmp = load i16, i16* %ptr, align 4 %ext = zext i16 %tmp to i64 @@ -425,9 +476,11 @@ define i16* @preidx16zext64(i16* %src, i64* %out) { } define i8* @preidx8zext32(i8* %src, i32* %out) { -; CHECK: ldrb w[[REG:[0-9]+]], [x0, #1]! -; CHECK: str w[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx8zext32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrb w8, [x0, #1]! +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 %ext = zext i8 %tmp to i32 @@ -436,9 +489,11 @@ define i8* @preidx8zext32(i8* %src, i32* %out) { } define i8* @preidx8zext64(i8* %src, i64* %out) { -; CHECK: ldrb w[[REG:[0-9]+]], [x0, #1]! -; CHECK: str x[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx8zext64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrb w8, [x0, #1]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 %ext = zext i8 %tmp to i64 @@ -447,9 +502,11 @@ define i8* @preidx8zext64(i8* %src, i64* %out) { } define i32* @preidx32sext64(i32* %src, i64* %out) { -; CHECK: ldrsw x[[REG:[0-9]+]], [x0, #4]! -; CHECK: str x[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx32sext64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrsw x8, [x0, #4]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i32, i32* %src, i64 1 %tmp = load i32, i32* %ptr, align 4 %ext = sext i32 %tmp to i64 @@ -458,9 +515,11 @@ define i32* @preidx32sext64(i32* %src, i64* %out) { } define i16* @preidx16sext32(i16* %src, i32* %out) { -; CHECK: ldrsh w[[REG:[0-9]+]], [x0, #2]! -; CHECK: str w[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx16sext32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrsh w8, [x0, #2]! +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i16, i16* %src, i64 1 %tmp = load i16, i16* %ptr, align 4 %ext = sext i16 %tmp to i32 @@ -469,9 +528,11 @@ define i16* @preidx16sext32(i16* %src, i32* %out) { } define i16* @preidx16sext64(i16* %src, i64* %out) { -; CHECK: ldrsh x[[REG:[0-9]+]], [x0, #2]! -; CHECK: str x[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx16sext64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrsh x8, [x0, #2]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i16, i16* %src, i64 1 %tmp = load i16, i16* %ptr, align 4 %ext = sext i16 %tmp to i64 @@ -480,9 +541,11 @@ define i16* @preidx16sext64(i16* %src, i64* %out) { } define i8* @preidx8sext32(i8* %src, i32* %out) { -; CHECK: ldrsb w[[REG:[0-9]+]], [x0, #1]! -; CHECK: str w[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx8sext32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrsb w8, [x0, #1]! +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 %ext = sext i8 %tmp to i32 @@ -491,9 +554,11 @@ define i8* @preidx8sext32(i8* %src, i32* %out) { } define i8* @preidx8sext64(i8* %src, i64* %out) { -; CHECK: ldrsb x[[REG:[0-9]+]], [x0, #1]! -; CHECK: str x[[REG]], [x1] -; CHECK: ret +; CHECK-LABEL: preidx8sext64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrsb x8, [x0, #1]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 %ext = sext i8 %tmp to i64 @@ -504,8 +569,19 @@ define i8* @preidx8sext64(i8* %src, i64* %out) { ; This test checks if illegal post-index is generated define i64* @postidx_clobber(i64* %addr) nounwind noinline ssp { -; CHECK-LABEL: postidx_clobber: -; CHECK-NOT: str x0, [x0], #8 +; CHECK64-LABEL: postidx_clobber: +; CHECK64: ; %bb.0: +; CHECK64-NEXT: mov x8, x0 +; CHECK64-NEXT: str x0, [x8], #8 +; CHECK64-NEXT: mov x0, x8 +; CHECK64-NEXT: ret +; +; CHECK32-LABEL: postidx_clobber: +; CHECK32: ; %bb.0: +; CHECK32-NEXT: add w8, w0, #8 +; CHECK32-NEXT: str w0, [x0] +; CHECK32-NEXT: mov x0, x8 +; CHECK32-NEXT: ret ; ret %paddr = bitcast i64* %addr to i64** store i64* %addr, i64** %paddr diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index b3381b6..d4aaa9c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -1,10 +1,15 @@ -; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s @ptr = global i8* null define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) { ; CHECK-LABEL: test_v8i8_pre_load: -; CHECK: ldr d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #40]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5 %val = load <8 x i8>, <8 x i8>* %newaddr, align 8 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) @@ -13,7 +18,11 @@ define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) { define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) { ; CHECK-LABEL: test_v8i8_post_load: -; CHECK: ldr d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0], #40 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5 %val = load <8 x i8>, <8 x i8>* %addr, align 8 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) @@ -22,7 +31,11 @@ define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) { define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) { ; CHECK-LABEL: test_v8i8_pre_store: -; CHECK: str d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5 store <8 x i8> %in, <8 x i8>* %newaddr, align 8 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) @@ -31,7 +44,11 @@ define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) { define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) { ; CHECK-LABEL: test_v8i8_post_store: -; CHECK: str d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0], #40 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5 store <8 x i8> %in, <8 x i8>* %addr, align 8 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) @@ -40,7 +57,11 @@ define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) { define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) { ; CHECK-LABEL: test_v4i16_pre_load: -; CHECK: ldr d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #40]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5 %val = load <4 x i16>, <4 x i16>* %newaddr, align 8 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) @@ -49,7 +70,11 @@ define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) { define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) { ; CHECK-LABEL: test_v4i16_post_load: -; CHECK: ldr d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0], #40 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5 %val = load <4 x i16>, <4 x i16>* %addr, align 8 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) @@ -58,7 +83,11 @@ define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) { define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) { ; CHECK-LABEL: test_v4i16_pre_store: -; CHECK: str d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5 store <4 x i16> %in, <4 x i16>* %newaddr, align 8 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) @@ -67,7 +96,11 @@ define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) { define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) { ; CHECK-LABEL: test_v4i16_post_store: -; CHECK: str d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0], #40 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5 store <4 x i16> %in, <4 x i16>* %addr, align 8 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) @@ -76,7 +109,11 @@ define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) { define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) { ; CHECK-LABEL: test_v2i32_pre_load: -; CHECK: ldr d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #40]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5 %val = load <2 x i32>, <2 x i32>* %newaddr, align 8 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) @@ -85,7 +122,11 @@ define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) { define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) { ; CHECK-LABEL: test_v2i32_post_load: -; CHECK: ldr d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0], #40 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5 %val = load <2 x i32>, <2 x i32>* %addr, align 8 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) @@ -94,7 +135,11 @@ define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) { define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) { ; CHECK-LABEL: test_v2i32_pre_store: -; CHECK: str d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5 store <2 x i32> %in, <2 x i32>* %newaddr, align 8 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) @@ -103,7 +148,11 @@ define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) { define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) { ; CHECK-LABEL: test_v2i32_post_store: -; CHECK: str d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0], #40 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5 store <2 x i32> %in, <2 x i32>* %addr, align 8 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) @@ -112,7 +161,11 @@ define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) { define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) { ; CHECK-LABEL: test_v2f32_pre_load: -; CHECK: ldr d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #40]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5 %val = load <2 x float>, <2 x float>* %newaddr, align 8 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) @@ -121,7 +174,11 @@ define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) { define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) { ; CHECK-LABEL: test_v2f32_post_load: -; CHECK: ldr d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0], #40 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5 %val = load <2 x float>, <2 x float>* %addr, align 8 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) @@ -130,7 +187,11 @@ define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) { define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) { ; CHECK-LABEL: test_v2f32_pre_store: -; CHECK: str d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5 store <2 x float> %in, <2 x float>* %newaddr, align 8 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) @@ -139,7 +200,11 @@ define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) { define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) { ; CHECK-LABEL: test_v2f32_post_store: -; CHECK: str d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0], #40 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5 store <2 x float> %in, <2 x float>* %addr, align 8 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) @@ -148,7 +213,11 @@ define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) { define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) { ; CHECK-LABEL: test_v1i64_pre_load: -; CHECK: ldr d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0, #40]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5 %val = load <1 x i64>, <1 x i64>* %newaddr, align 8 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) @@ -157,7 +226,11 @@ define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) { define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) { ; CHECK-LABEL: test_v1i64_post_load: -; CHECK: ldr d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0], #40 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5 %val = load <1 x i64>, <1 x i64>* %addr, align 8 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) @@ -166,7 +239,11 @@ define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) { define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) { ; CHECK-LABEL: test_v1i64_pre_store: -; CHECK: str d0, [x0, #40]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0, #40]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5 store <1 x i64> %in, <1 x i64>* %newaddr, align 8 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) @@ -175,7 +252,11 @@ define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) { define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) { ; CHECK-LABEL: test_v1i64_post_store: -; CHECK: str d0, [x0], #40 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str d0, [x0], #40 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5 store <1 x i64> %in, <1 x i64>* %addr, align 8 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) @@ -184,7 +265,11 @@ define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) { define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) { ; CHECK-LABEL: test_v16i8_pre_load: -; CHECK: ldr q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5 %val = load <16 x i8>, <16 x i8>* %newaddr, align 8 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) @@ -193,7 +278,11 @@ define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) { define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) { ; CHECK-LABEL: test_v16i8_post_load: -; CHECK: ldr q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0], #80 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5 %val = load <16 x i8>, <16 x i8>* %addr, align 8 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) @@ -202,7 +291,11 @@ define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) { define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) { ; CHECK-LABEL: test_v16i8_pre_store: -; CHECK: str q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5 store <16 x i8> %in, <16 x i8>* %newaddr, align 8 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) @@ -211,7 +304,11 @@ define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) { define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) { ; CHECK-LABEL: test_v16i8_post_store: -; CHECK: str q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0], #80 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5 store <16 x i8> %in, <16 x i8>* %addr, align 8 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) @@ -220,7 +317,11 @@ define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) { define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) { ; CHECK-LABEL: test_v8i16_pre_load: -; CHECK: ldr q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5 %val = load <8 x i16>, <8 x i16>* %newaddr, align 8 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) @@ -229,7 +330,11 @@ define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) { define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) { ; CHECK-LABEL: test_v8i16_post_load: -; CHECK: ldr q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0], #80 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5 %val = load <8 x i16>, <8 x i16>* %addr, align 8 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) @@ -238,7 +343,11 @@ define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) { define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) { ; CHECK-LABEL: test_v8i16_pre_store: -; CHECK: str q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5 store <8 x i16> %in, <8 x i16>* %newaddr, align 8 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) @@ -247,7 +356,11 @@ define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) { define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) { ; CHECK-LABEL: test_v8i16_post_store: -; CHECK: str q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0], #80 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5 store <8 x i16> %in, <8 x i16>* %addr, align 8 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) @@ -256,7 +369,11 @@ define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) { define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) { ; CHECK-LABEL: test_v4i32_pre_load: -; CHECK: ldr q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5 %val = load <4 x i32>, <4 x i32>* %newaddr, align 8 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) @@ -265,7 +382,11 @@ define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) { define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) { ; CHECK-LABEL: test_v4i32_post_load: -; CHECK: ldr q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0], #80 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5 %val = load <4 x i32>, <4 x i32>* %addr, align 8 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) @@ -274,7 +395,11 @@ define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) { define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) { ; CHECK-LABEL: test_v4i32_pre_store: -; CHECK: str q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5 store <4 x i32> %in, <4 x i32>* %newaddr, align 8 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) @@ -283,7 +408,11 @@ define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) { define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) { ; CHECK-LABEL: test_v4i32_post_store: -; CHECK: str q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0], #80 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5 store <4 x i32> %in, <4 x i32>* %addr, align 8 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) @@ -293,7 +422,11 @@ define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) { define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) { ; CHECK-LABEL: test_v4f32_pre_load: -; CHECK: ldr q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5 %val = load <4 x float>, <4 x float>* %newaddr, align 8 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) @@ -302,7 +435,11 @@ define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) { define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) { ; CHECK-LABEL: test_v4f32_post_load: -; CHECK: ldr q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0], #80 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5 %val = load <4 x float>, <4 x float>* %addr, align 8 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) @@ -311,7 +448,11 @@ define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) { define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) { ; CHECK-LABEL: test_v4f32_pre_store: -; CHECK: str q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5 store <4 x float> %in, <4 x float>* %newaddr, align 8 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) @@ -320,7 +461,11 @@ define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) { define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) { ; CHECK-LABEL: test_v4f32_post_store: -; CHECK: str q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0], #80 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5 store <4 x float> %in, <4 x float>* %addr, align 8 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) @@ -330,7 +475,11 @@ define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) { define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) { ; CHECK-LABEL: test_v2i64_pre_load: -; CHECK: ldr q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5 %val = load <2 x i64>, <2 x i64>* %newaddr, align 8 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) @@ -339,7 +488,11 @@ define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) { define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) { ; CHECK-LABEL: test_v2i64_post_load: -; CHECK: ldr q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0], #80 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5 %val = load <2 x i64>, <2 x i64>* %addr, align 8 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) @@ -348,7 +501,11 @@ define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) { define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) { ; CHECK-LABEL: test_v2i64_pre_store: -; CHECK: str q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5 store <2 x i64> %in, <2 x i64>* %newaddr, align 8 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) @@ -357,7 +514,11 @@ define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) { define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) { ; CHECK-LABEL: test_v2i64_post_store: -; CHECK: str q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0], #80 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5 store <2 x i64> %in, <2 x i64>* %addr, align 8 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) @@ -367,7 +528,11 @@ define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) { define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) { ; CHECK-LABEL: test_v2f64_pre_load: -; CHECK: ldr q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0, #80]! +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5 %val = load <2 x double>, <2 x double>* %newaddr, align 8 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) @@ -376,7 +541,11 @@ define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) { define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) { ; CHECK-LABEL: test_v2f64_post_load: -; CHECK: ldr q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0], #80 +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5 %val = load <2 x double>, <2 x double>* %addr, align 8 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) @@ -385,7 +554,11 @@ define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) { define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) { ; CHECK-LABEL: test_v2f64_pre_store: -; CHECK: str q0, [x0, #80]! +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0, #80]! +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5 store <2 x double> %in, <2 x double>* %newaddr, align 8 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) @@ -394,7 +567,11 @@ define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) { define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) { ; CHECK-LABEL: test_v2f64_post_store: -; CHECK: str q0, [x0], #80 +; CHECK: ; %bb.0: +; CHECK-NEXT: adrp x8, _ptr@PAGE +; CHECK-NEXT: str q0, [x0], #80 +; CHECK-NEXT: str x0, [x8, _ptr@PAGEOFF] +; CHECK-NEXT: ret %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5 store <2 x double> %in, <2 x double>* %addr, align 8 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) @@ -403,7 +580,9 @@ define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) { define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) { ; CHECK-LABEL: test_v16i8_post_imm_st1_lane: -; CHECK: st1.b { v0 }[3], [x0], #1 +; CHECK: ; %bb.0: +; CHECK-NEXT: st1.b { v0 }[3], [x0], #1 +; CHECK-NEXT: ret %elt = extractelement <16 x i8> %in, i32 3 store i8 %elt, i8* %addr @@ -413,8 +592,10 @@ define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) { define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) { ; CHECK-LABEL: test_v16i8_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #2 -; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: st1.b { v0 }[3], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <16 x i8> %in, i32 3 store i8 %elt, i8* %addr @@ -425,7 +606,9 @@ define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) { define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) { ; CHECK-LABEL: test_v8i16_post_imm_st1_lane: -; CHECK: st1.h { v0 }[3], [x0], #2 +; CHECK: ; %bb.0: +; CHECK-NEXT: st1.h { v0 }[3], [x0], #2 +; CHECK-NEXT: ret %elt = extractelement <8 x i16> %in, i32 3 store i16 %elt, i16* %addr @@ -435,8 +618,10 @@ define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) { define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) { ; CHECK-LABEL: test_v8i16_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #4 -; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: st1.h { v0 }[3], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <8 x i16> %in, i32 3 store i16 %elt, i16* %addr @@ -446,7 +631,9 @@ define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) { define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) { ; CHECK-LABEL: test_v4i32_post_imm_st1_lane: -; CHECK: st1.s { v0 }[3], [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: st1.s { v0 }[3], [x0], #4 +; CHECK-NEXT: ret %elt = extractelement <4 x i32> %in, i32 3 store i32 %elt, i32* %addr @@ -456,8 +643,10 @@ define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) { define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) { ; CHECK-LABEL: test_v4i32_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #8 -; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: st1.s { v0 }[3], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <4 x i32> %in, i32 3 store i32 %elt, i32* %addr @@ -467,7 +656,9 @@ define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) { define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) { ; CHECK-LABEL: test_v4f32_post_imm_st1_lane: -; CHECK: st1.s { v0 }[3], [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: st1.s { v0 }[3], [x0], #4 +; CHECK-NEXT: ret %elt = extractelement <4 x float> %in, i32 3 store float %elt, float* %addr @@ -477,8 +668,10 @@ define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) { define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) { ; CHECK-LABEL: test_v4f32_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #8 -; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: st1.s { v0 }[3], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <4 x float> %in, i32 3 store float %elt, float* %addr @@ -488,7 +681,9 @@ define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) { define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) { ; CHECK-LABEL: test_v2i64_post_imm_st1_lane: -; CHECK: st1.d { v0 }[1], [x0], #8 +; CHECK: ; %bb.0: +; CHECK-NEXT: st1.d { v0 }[1], [x0], #8 +; CHECK-NEXT: ret %elt = extractelement <2 x i64> %in, i64 1 store i64 %elt, i64* %addr @@ -498,8 +693,10 @@ define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) { define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) { ; CHECK-LABEL: test_v2i64_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #16 -; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: st1.d { v0 }[1], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <2 x i64> %in, i64 1 store i64 %elt, i64* %addr @@ -509,7 +706,9 @@ define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) { define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) { ; CHECK-LABEL: test_v2f64_post_imm_st1_lane: -; CHECK: st1.d { v0 }[1], [x0], #8 +; CHECK: ; %bb.0: +; CHECK-NEXT: st1.d { v0 }[1], [x0], #8 +; CHECK-NEXT: ret %elt = extractelement <2 x double> %in, i32 1 store double %elt, double* %addr @@ -519,8 +718,10 @@ define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) { define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) { ; CHECK-LABEL: test_v2f64_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #16 -; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: st1.d { v0 }[1], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <2 x double> %in, i32 1 store double %elt, double* %addr @@ -530,7 +731,10 @@ define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) { define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) { ; CHECK-LABEL: test_v8i8_post_imm_st1_lane: -; CHECK: st1.b { v0 }[3], [x0], #1 +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1.b { v0 }[3], [x0], #1 +; CHECK-NEXT: ret %elt = extractelement <8 x i8> %in, i32 3 store i8 %elt, i8* %addr @@ -540,8 +744,11 @@ define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) { define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) { ; CHECK-LABEL: test_v8i8_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #2 -; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1.b { v0 }[3], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <8 x i8> %in, i32 3 store i8 %elt, i8* %addr @@ -551,7 +758,10 @@ define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) { define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) { ; CHECK-LABEL: test_v4i16_post_imm_st1_lane: -; CHECK: st1.h { v0 }[3], [x0], #2 +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1.h { v0 }[3], [x0], #2 +; CHECK-NEXT: ret %elt = extractelement <4 x i16> %in, i32 3 store i16 %elt, i16* %addr @@ -561,8 +771,11 @@ define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) { define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) { ; CHECK-LABEL: test_v4i16_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #4 -; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1.h { v0 }[3], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <4 x i16> %in, i32 3 store i16 %elt, i16* %addr @@ -572,7 +785,10 @@ define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) { define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) { ; CHECK-LABEL: test_v2i32_post_imm_st1_lane: -; CHECK: st1.s { v0 }[1], [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1.s { v0 }[1], [x0], #4 +; CHECK-NEXT: ret %elt = extractelement <2 x i32> %in, i32 1 store i32 %elt, i32* %addr @@ -582,8 +798,11 @@ define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) { define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) { ; CHECK-LABEL: test_v2i32_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #8 -; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1.s { v0 }[1], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <2 x i32> %in, i32 1 store i32 %elt, i32* %addr @@ -593,7 +812,10 @@ define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) { define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) { ; CHECK-LABEL: test_v2f32_post_imm_st1_lane: -; CHECK: st1.s { v0 }[1], [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1.s { v0 }[1], [x0], #4 +; CHECK-NEXT: ret %elt = extractelement <2 x float> %in, i32 1 store float %elt, float* %addr @@ -603,8 +825,11 @@ define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) { define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) { ; CHECK-LABEL: test_v2f32_post_reg_st1_lane: -; CHECK: mov w[[OFFSET:[0-9]+]], #8 -; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]] +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1.s { v0 }[1], [x0], x8 +; CHECK-NEXT: ret %elt = extractelement <2 x float> %in, i32 1 store float %elt, float* %addr @@ -613,8 +838,11 @@ define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) { } define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v16i8_post_imm_ld2: -;CHECK: ld2.16b { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v16i8_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.16b { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 32 store i8* %tmp, i8** %ptr @@ -622,8 +850,11 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) { } define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v16i8_post_reg_ld2: -;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.16b { v0, v1 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -634,8 +865,11 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v8i8_post_imm_ld2: -;CHECK: ld2.8b { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v8i8_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.8b { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 16 store i8* %tmp, i8** %ptr @@ -643,8 +877,11 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) { } define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i8_post_reg_ld2: -;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.8b { v0, v1 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -655,8 +892,11 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v8i16_post_imm_ld2: -;CHECK: ld2.8h { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v8i16_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.8h { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 16 store i16* %tmp, i16** %ptr @@ -664,8 +904,12 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) { } define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i16_post_reg_ld2: -;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld2.8h { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -676,8 +920,11 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v4i16_post_imm_ld2: -;CHECK: ld2.4h { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v4i16_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.4h { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 8 store i16* %tmp, i16** %ptr @@ -685,8 +932,12 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) { } define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i16_post_reg_ld2: -;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld2.4h { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -697,8 +948,11 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v4i32_post_imm_ld2: -;CHECK: ld2.4s { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v4i32_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.4s { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 8 store i32* %tmp, i32** %ptr @@ -706,8 +960,12 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) { } define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i32_post_reg_ld2: -;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld2.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -718,8 +976,11 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v2i32_post_imm_ld2: -;CHECK: ld2.2s { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2i32_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.2s { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 store i32* %tmp, i32** %ptr @@ -727,8 +988,12 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) { } define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i32_post_reg_ld2: -;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld2.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -739,8 +1004,11 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v2i64_post_imm_ld2: -;CHECK: ld2.2d { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v2i64_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 4 store i64* %tmp, i64** %ptr @@ -748,8 +1016,12 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) { } define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i64_post_reg_ld2: -;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -760,8 +1032,11 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v1i64_post_imm_ld2: -;CHECK: ld1.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1i64_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 2 store i64* %tmp, i64** %ptr @@ -769,8 +1044,12 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) { } define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1i64_post_reg_ld2: -;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -781,8 +1060,11 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) { -;CHECK-LABEL: test_v4f32_post_imm_ld2: -;CHECK: ld2.4s { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v4f32_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.4s { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 8 store float* %tmp, float** %ptr @@ -790,8 +1072,12 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** } define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4f32_post_reg_ld2: -;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld2.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -802,8 +1088,11 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*) define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) { -;CHECK-LABEL: test_v2f32_post_imm_ld2: -;CHECK: ld2.2s { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2f32_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.2s { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 4 store float* %tmp, float** %ptr @@ -811,8 +1100,12 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** } define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f32_post_reg_ld2: -;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld2.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -823,8 +1116,11 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*) define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) { -;CHECK-LABEL: test_v2f64_post_imm_ld2: -;CHECK: ld2.2d { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v2f64_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 4 store double* %tmp, double** %ptr @@ -832,8 +1128,12 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, doubl } define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f64_post_reg_ld2: -;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -844,8 +1144,11 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) { -;CHECK-LABEL: test_v1f64_post_imm_ld2: -;CHECK: ld1.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1f64_post_imm_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 2 store double* %tmp, double** %ptr @@ -853,8 +1156,12 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, doubl } define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1f64_post_reg_ld2: -;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -865,8 +1172,11 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v16i8_post_imm_ld3: -;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v16i8_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.16b { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 48 store i8* %tmp, i8** %ptr @@ -874,8 +1184,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** } define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v16i8_post_reg_ld3: -;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.16b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -886,8 +1199,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v8i8_post_imm_ld3: -;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v8i8_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.8b { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 24 store i8* %tmp, i8** %ptr @@ -895,8 +1211,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %pt } define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i8_post_reg_ld3: -;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.8b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -907,8 +1226,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v8i16_post_imm_ld3: -;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v8i16_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.8h { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 24 store i16* %tmp, i16** %ptr @@ -916,8 +1238,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16 } define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i16_post_reg_ld3: -;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld3.8h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -928,8 +1254,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v4i16_post_imm_ld3: -;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v4i16_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.4h { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 12 store i16* %tmp, i16** %ptr @@ -937,8 +1266,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16 } define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i16_post_reg_ld3: -;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld3.4h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -949,8 +1282,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v4i32_post_imm_ld3: -;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v4i32_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 12 store i32* %tmp, i32** %ptr @@ -958,8 +1294,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32 } define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i32_post_reg_ld3: -;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -970,8 +1310,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v2i32_post_imm_ld3: -;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2i32_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 6 store i32* %tmp, i32** %ptr @@ -979,8 +1322,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32 } define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i32_post_reg_ld3: -;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -991,8 +1338,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v2i64_post_imm_ld3: -;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v2i64_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 6 store i64* %tmp, i64** %ptr @@ -1000,8 +1350,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64 } define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i64_post_reg_ld3: -;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -1012,8 +1366,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v1i64_post_imm_ld3: -;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1i64_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 3 store i64* %tmp, i64** %ptr @@ -1021,8 +1378,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64 } define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1i64_post_reg_ld3: -;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -1033,8 +1394,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) { -;CHECK-LABEL: test_v4f32_post_imm_ld3: -;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v4f32_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 12 store float* %tmp, float** %ptr @@ -1042,8 +1406,12 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* } define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4f32_post_reg_ld3: -;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -1054,8 +1422,11 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) { -;CHECK-LABEL: test_v2f32_post_imm_ld3: -;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2f32_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 6 store float* %tmp, float** %ptr @@ -1063,8 +1434,12 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* } define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f32_post_reg_ld3: -;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -1075,8 +1450,11 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) { -;CHECK-LABEL: test_v2f64_post_imm_ld3: -;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v2f64_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 6 store double* %tmp, double** %ptr @@ -1084,8 +1462,12 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(dou } define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f64_post_reg_ld3: -;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -1096,8 +1478,11 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f6 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) { -;CHECK-LABEL: test_v1f64_post_imm_ld3: -;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1f64_post_imm_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 3 store double* %tmp, double** %ptr @@ -1105,8 +1490,12 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(dou } define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1f64_post_reg_ld3: -;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -1117,8 +1506,11 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f6 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v16i8_post_imm_ld4: -;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v16i8_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.16b { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 64 store i8* %tmp, i8** %ptr @@ -1126,8 +1518,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i } define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v16i8_post_reg_ld4: -;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.16b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -1138,8 +1533,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v1 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v8i8_post_imm_ld4: -;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v8i8_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.8b { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 32 store i8* %tmp, i8** %ptr @@ -1147,8 +1545,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A } define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i8_post_reg_ld4: -;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.8b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -1159,8 +1560,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v8i16_post_imm_ld4: -;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v8i16_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.8h { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 32 store i16* %tmp, i16** %ptr @@ -1168,8 +1572,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i } define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i16_post_reg_ld4: -;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld4.8h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -1180,8 +1588,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v4i16_post_imm_ld4: -;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v4i16_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.4h { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 16 store i16* %tmp, i16** %ptr @@ -1189,8 +1600,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i } define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i16_post_reg_ld4: -;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld4.4h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -1201,8 +1616,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v4i32_post_imm_ld4: -;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v4i32_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 16 store i32* %tmp, i32** %ptr @@ -1210,8 +1628,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i } define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i32_post_reg_ld4: -;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -1222,8 +1644,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v2i32_post_imm_ld4: -;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2i32_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 8 store i32* %tmp, i32** %ptr @@ -1231,8 +1656,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i } define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i32_post_reg_ld4: -;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -1243,8 +1672,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v2i64_post_imm_ld4: -;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v2i64_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 8 store i64* %tmp, i64** %ptr @@ -1252,8 +1684,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i } define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i64_post_reg_ld4: -;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -1264,8 +1700,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v1i64_post_imm_ld4: -;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1i64_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 4 store i64* %tmp, i64** %ptr @@ -1273,8 +1712,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i } define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1i64_post_reg_ld4: -;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -1285,8 +1728,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) { -;CHECK-LABEL: test_v4f32_post_imm_ld4: -;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v4f32_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 16 store float* %tmp, float** %ptr @@ -1294,8 +1740,12 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i } define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4f32_post_reg_ld4: -;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -1306,8 +1756,11 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) { -;CHECK-LABEL: test_v2f32_post_imm_ld4: -;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2f32_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 8 store float* %tmp, float** %ptr @@ -1315,8 +1768,12 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i } define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f32_post_reg_ld4: -;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -1327,8 +1784,11 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) { -;CHECK-LABEL: test_v2f64_post_imm_ld4: -;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v2f64_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 8 store double* %tmp, double** %ptr @@ -1336,8 +1796,12 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po } define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f64_post_reg_ld4: -;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -1348,8 +1812,11 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) { -;CHECK-LABEL: test_v1f64_post_imm_ld4: -;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1f64_post_imm_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 4 store double* %tmp, double** %ptr @@ -1357,8 +1824,12 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po } define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1f64_post_reg_ld4: -;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -1368,8 +1839,11 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v16i8_post_imm_ld1x2: -;CHECK: ld1.16b { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v16i8_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.16b { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 32 store i8* %tmp, i8** %ptr @@ -1377,8 +1851,11 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) { } define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v16i8_post_reg_ld1x2: -;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.16b { v0, v1 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -1389,8 +1866,11 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v8i8_post_imm_ld1x2: -;CHECK: ld1.8b { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v8i8_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.8b { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 16 store i8* %tmp, i8** %ptr @@ -1398,8 +1878,11 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) { } define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i8_post_reg_ld1x2: -;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.8b { v0, v1 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -1410,8 +1893,11 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v8i16_post_imm_ld1x2: -;CHECK: ld1.8h { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v8i16_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.8h { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 16 store i16* %tmp, i16** %ptr @@ -1419,8 +1905,12 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) } define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i16_post_reg_ld1x2: -;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1.8h { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -1431,8 +1921,11 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v4i16_post_imm_ld1x2: -;CHECK: ld1.4h { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v4i16_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.4h { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 8 store i16* %tmp, i16** %ptr @@ -1440,8 +1933,12 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) } define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i16_post_reg_ld1x2: -;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1.4h { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -1452,8 +1949,11 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v4i32_post_imm_ld1x2: -;CHECK: ld1.4s { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v4i32_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.4s { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 8 store i32* %tmp, i32** %ptr @@ -1461,8 +1961,12 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) } define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i32_post_reg_ld1x2: -;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -1473,8 +1977,11 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v2i32_post_imm_ld1x2: -;CHECK: ld1.2s { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2i32_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2s { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 store i32* %tmp, i32** %ptr @@ -1482,8 +1989,12 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) } define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i32_post_reg_ld1x2: -;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -1494,8 +2005,11 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v2i64_post_imm_ld1x2: -;CHECK: ld1.2d { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v2i64_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2d { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 4 store i64* %tmp, i64** %ptr @@ -1503,8 +2017,12 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) } define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i64_post_reg_ld1x2: -;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -1515,8 +2033,11 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v1i64_post_imm_ld1x2: -;CHECK: ld1.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1i64_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 2 store i64* %tmp, i64** %ptr @@ -1524,8 +2045,12 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) } define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1i64_post_reg_ld1x2: -;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -1536,8 +2061,11 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) { -;CHECK-LABEL: test_v4f32_post_imm_ld1x2: -;CHECK: ld1.4s { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v4f32_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.4s { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 8 store float* %tmp, float** %ptr @@ -1545,8 +2073,12 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float* } define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4f32_post_reg_ld1x2: -;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -1557,8 +2089,11 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) { -;CHECK-LABEL: test_v2f32_post_imm_ld1x2: -;CHECK: ld1.2s { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2f32_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2s { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 4 store float* %tmp, float** %ptr @@ -1566,8 +2101,12 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float* } define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f32_post_reg_ld1x2: -;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -1578,8 +2117,11 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) { -;CHECK-LABEL: test_v2f64_post_imm_ld1x2: -;CHECK: ld1.2d { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v2f64_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2d { v0, v1 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 4 store double* %tmp, double** %ptr @@ -1587,8 +2129,12 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, dou } define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f64_post_reg_ld1x2: -;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -1599,8 +2145,11 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(doub define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) { -;CHECK-LABEL: test_v1f64_post_imm_ld1x2: -;CHECK: ld1.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1f64_post_imm_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 2 store double* %tmp, double** %ptr @@ -1608,8 +2157,12 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, dou } define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1f64_post_reg_ld1x2: -;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -1620,8 +2173,11 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(doub define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v16i8_post_imm_ld1x3: -;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v16i8_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.16b { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 48 store i8* %tmp, i8** %ptr @@ -1629,8 +2185,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8 } define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v16i8_post_reg_ld1x3: -;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.16b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -1641,8 +2200,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8( define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v8i8_post_imm_ld1x3: -;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v8i8_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.8b { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 24 store i8* %tmp, i8** %ptr @@ -1650,8 +2212,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** % } define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i8_post_reg_ld1x3: -;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.8b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -1662,8 +2227,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v8i16_post_imm_ld1x3: -;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v8i16_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.8h { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 24 store i16* %tmp, i16** %ptr @@ -1671,8 +2239,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i } define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i16_post_reg_ld1x3: -;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1.8h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -1683,8 +2255,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v4i16_post_imm_ld1x3: -;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v4i16_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.4h { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 12 store i16* %tmp, i16** %ptr @@ -1692,8 +2267,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i } define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i16_post_reg_ld1x3: -;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1.4h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -1704,8 +2283,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v4i32_post_imm_ld1x3: -;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v4i32_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 12 store i32* %tmp, i32** %ptr @@ -1713,8 +2295,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i } define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i32_post_reg_ld1x3: -;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -1725,8 +2311,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v2i32_post_imm_ld1x3: -;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2i32_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 6 store i32* %tmp, i32** %ptr @@ -1734,8 +2323,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i } define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i32_post_reg_ld1x3: -;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -1746,8 +2339,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v2i64_post_imm_ld1x3: -;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v2i64_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 6 store i64* %tmp, i64** %ptr @@ -1755,8 +2351,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i } define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i64_post_reg_ld1x3: -;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -1767,8 +2367,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v1i64_post_imm_ld1x3: -;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1i64_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 3 store i64* %tmp, i64** %ptr @@ -1776,8 +2379,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i } define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1i64_post_reg_ld1x3: -;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -1788,8 +2395,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) { -;CHECK-LABEL: test_v4f32_post_imm_ld1x3: -;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v4f32_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 12 store float* %tmp, float** %ptr @@ -1797,8 +2407,12 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(floa } define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4f32_post_reg_ld1x3: -;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -1809,8 +2423,11 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) { -;CHECK-LABEL: test_v2f32_post_imm_ld1x3: -;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2f32_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 6 store float* %tmp, float** %ptr @@ -1818,8 +2435,12 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(floa } define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f32_post_reg_ld1x3: -;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -1830,8 +2451,11 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) { -;CHECK-LABEL: test_v2f64_post_imm_ld1x3: -;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v2f64_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 6 store double* %tmp, double** %ptr @@ -1839,8 +2463,12 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(d } define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f64_post_reg_ld1x3: -;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -1851,8 +2479,11 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) { -;CHECK-LABEL: test_v1f64_post_imm_ld1x3: -;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1f64_post_imm_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 3 store double* %tmp, double** %ptr @@ -1860,8 +2491,12 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(d } define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1f64_post_reg_ld1x3: -;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -1872,8 +2507,11 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v16i8_post_imm_ld1x4: -;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v16i8_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.16b { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 64 store i8* %tmp, i8** %ptr @@ -1881,8 +2519,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4 } define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v16i8_post_reg_ld1x4: -;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.16b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -1893,8 +2534,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4. define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) { -;CHECK-LABEL: test_v8i8_post_imm_ld1x4: -;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v8i8_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.8b { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 32 store i8* %tmp, i8** %ptr @@ -1902,8 +2546,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* } define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i8_post_reg_ld1x4: -;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.8b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -1914,8 +2561,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v8i16_post_imm_ld1x4: -;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v8i16_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.8h { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 32 store i16* %tmp, i16** %ptr @@ -1923,8 +2573,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4 } define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v8i16_post_reg_ld1x4: -;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1.8h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -1935,8 +2589,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4. define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) { -;CHECK-LABEL: test_v4i16_post_imm_ld1x4: -;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v4i16_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.4h { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 16 store i16* %tmp, i16** %ptr @@ -1944,8 +2601,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4 } define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i16_post_reg_ld1x4: -;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1.4h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -1956,8 +2617,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4. define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v4i32_post_imm_ld1x4: -;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v4i32_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 16 store i32* %tmp, i32** %ptr @@ -1965,8 +2629,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4 } define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4i32_post_reg_ld1x4: -;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -1977,8 +2645,11 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4. define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) { -;CHECK-LABEL: test_v2i32_post_imm_ld1x4: -;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2i32_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 8 store i32* %tmp, i32** %ptr @@ -1986,8 +2657,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4 } define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i32_post_reg_ld1x4: -;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -1998,8 +2673,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4. define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v2i64_post_imm_ld1x4: -;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v2i64_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 8 store i64* %tmp, i64** %ptr @@ -2007,8 +2685,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4 } define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2i64_post_reg_ld1x4: -;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -2019,8 +2701,11 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4. define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) { -;CHECK-LABEL: test_v1i64_post_imm_ld1x4: -;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1i64_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 4 store i64* %tmp, i64** %ptr @@ -2028,8 +2713,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4 } define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1i64_post_reg_ld1x4: -;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -2040,8 +2729,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4. define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) { -;CHECK-LABEL: test_v4f32_post_imm_ld1x4: -;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v4f32_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 16 store float* %tmp, float** %ptr @@ -2049,8 +2741,12 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i } define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v4f32_post_reg_ld1x4: -;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -2061,8 +2757,11 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) { -;CHECK-LABEL: test_v2f32_post_imm_ld1x4: -;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2f32_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 8 store float* %tmp, float** %ptr @@ -2070,8 +2769,12 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i } define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f32_post_reg_ld1x4: -;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -2082,8 +2785,11 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) { -;CHECK-LABEL: test_v2f64_post_imm_ld1x4: -;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v2f64_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 8 store double* %tmp, double** %ptr @@ -2091,8 +2797,12 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po } define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v2f64_post_reg_ld1x4: -;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -2103,8 +2813,11 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) { -;CHECK-LABEL: test_v1f64_post_imm_ld1x4: -;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1f64_post_imm_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 4 store double* %tmp, double** %ptr @@ -2112,8 +2825,12 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po } define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) { -;CHECK-LABEL: test_v1f64_post_reg_ld1x4: -;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -2124,8 +2841,11 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_ld2r: -;CHECK: ld2r.16b { v0, v1 }, [x0], #2 +; CHECK-LABEL: test_v16i8_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.16b { v0, v1 }, [x0], #2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 2 store i8* %tmp, i8** %ptr @@ -2133,8 +2853,11 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nou } define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_ld2r: -;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.16b { v0, v1 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -2145,8 +2868,11 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwin define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_ld2r: -;CHECK: ld2r.8b { v0, v1 }, [x0], #2 +; CHECK-LABEL: test_v8i8_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.8b { v0, v1 }, [x0], #2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 2 store i8* %tmp, i8** %ptr @@ -2154,8 +2880,11 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwi } define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_ld2r: -;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.8b { v0, v1 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -2166,8 +2895,11 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind r define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_ld2r: -;CHECK: ld2r.8h { v0, v1 }, [x0], #4 +; CHECK-LABEL: test_v8i16_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.8h { v0, v1 }, [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 2 store i16* %tmp, i16** %ptr @@ -2175,8 +2907,12 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) n } define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_ld2r: -;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld2r.8h { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -2187,8 +2923,11 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounw define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_ld2r: -;CHECK: ld2r.4h { v0, v1 }, [x0], #4 +; CHECK-LABEL: test_v4i16_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.4h { v0, v1 }, [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 2 store i16* %tmp, i16** %ptr @@ -2196,8 +2935,12 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) n } define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_ld2r: -;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld2r.4h { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -2208,8 +2951,11 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounw define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_ld2r: -;CHECK: ld2r.4s { v0, v1 }, [x0], #8 +; CHECK-LABEL: test_v4i32_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 2 store i32* %tmp, i32** %ptr @@ -2217,8 +2963,12 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) n } define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_ld2r: -;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -2228,8 +2978,11 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_ld2r: -;CHECK: ld2r.2s { v0, v1 }, [x0], #8 +; CHECK-LABEL: test_v2i32_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 2 store i32* %tmp, i32** %ptr @@ -2237,8 +2990,12 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) n } define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_ld2r: -;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -2249,8 +3006,11 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounw define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_ld2r: -;CHECK: ld2r.2d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2i64_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 2 store i64* %tmp, i64** %ptr @@ -2258,8 +3018,12 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) n } define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_ld2r: -;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -2269,8 +3033,11 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_ld2r: -;CHECK: ld2r.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1i64_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 2 store i64* %tmp, i64** %ptr @@ -2278,8 +3045,12 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) n } define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_ld2r: -;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -2290,8 +3061,11 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounw define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_ld2r: -;CHECK: ld2r.4s { v0, v1 }, [x0], #8 +; CHECK-LABEL: test_v4f32_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 2 store float* %tmp, float** %ptr @@ -2299,8 +3073,12 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** } define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_ld2r: -;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -2310,8 +3088,11 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_ld2r: -;CHECK: ld2r.2s { v0, v1 }, [x0], #8 +; CHECK-LABEL: test_v2f32_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 2 store float* %tmp, float** %ptr @@ -2319,8 +3100,12 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** } define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_ld2r: -;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -2331,8 +3116,11 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_ld2r: -;CHECK: ld2r.2d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2f64_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 2 store double* %tmp, double** %ptr @@ -2340,8 +3128,12 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, doub } define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_ld2r: -;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -2351,8 +3143,11 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, doub declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_ld2r: -;CHECK: ld2r.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1f64_post_imm_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 2 store double* %tmp, double** %ptr @@ -2360,8 +3155,12 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, doub } define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_ld2r: -;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld2r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -2372,8 +3171,11 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(doubl define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_ld3r: -;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3 +; CHECK-LABEL: test_v16i8_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.16b { v0, v1, v2 }, [x0], #3 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 3 store i8* %tmp, i8** %ptr @@ -2381,8 +3183,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8* } define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_ld3r: -;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.16b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -2393,8 +3198,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_ld3r: -;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3 +; CHECK-LABEL: test_v8i8_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.8b { v0, v1, v2 }, [x0], #3 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 3 store i8* %tmp, i8** %ptr @@ -2402,8 +3210,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %p } define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_ld3r: -;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.8b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -2414,8 +3225,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_ld3r: -;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6 +; CHECK-LABEL: test_v8i16_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.8h { v0, v1, v2 }, [x0], #6 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 3 store i16* %tmp, i16** %ptr @@ -2423,8 +3237,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i1 } define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_ld3r: -;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld3r.8h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -2435,8 +3253,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16( define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_ld3r: -;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6 +; CHECK-LABEL: test_v4i16_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.4h { v0, v1, v2 }, [x0], #6 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 3 store i16* %tmp, i16** %ptr @@ -2444,8 +3265,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i1 } define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_ld3r: -;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld3r.4h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -2456,8 +3281,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16( define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_ld3r: -;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12 +; CHECK-LABEL: test_v4i32_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0], #12 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 3 store i32* %tmp, i32** %ptr @@ -2465,8 +3293,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i3 } define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_ld3r: -;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -2476,8 +3308,11 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i3 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_ld3r: -;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12 +; CHECK-LABEL: test_v2i32_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0], #12 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 3 store i32* %tmp, i32** %ptr @@ -2485,8 +3320,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i3 } define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_ld3r: -;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -2497,8 +3336,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32( define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_ld3r: -;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2i64_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 3 store i64* %tmp, i64** %ptr @@ -2506,8 +3348,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i6 } define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_ld3r: -;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -2517,8 +3363,11 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i6 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_ld3r: -;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1i64_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 3 store i64* %tmp, i64** %ptr @@ -2526,8 +3375,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i6 } define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_ld3r: -;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -2538,8 +3391,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64( define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_ld3r: -;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12 +; CHECK-LABEL: test_v4f32_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0], #12 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 3 store float* %tmp, float** %ptr @@ -2547,8 +3403,12 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float } define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_ld3r: -;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -2558,8 +3418,11 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_ld3r: -;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12 +; CHECK-LABEL: test_v2f32_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0], #12 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 3 store float* %tmp, float** %ptr @@ -2567,8 +3430,12 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float } define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_ld3r: -;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -2579,8 +3446,11 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32. define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_ld3r: -;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2f64_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 3 store double* %tmp, double** %ptr @@ -2588,8 +3458,12 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(do } define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_ld3r: -;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -2599,8 +3473,11 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(do declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_ld3r: -;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1f64_post_imm_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 3 store double* %tmp, double** %ptr @@ -2608,8 +3485,12 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(do } define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_ld3r: -;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld3r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -2620,8 +3501,11 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_ld4r: -;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4 +; CHECK-LABEL: test_v16i8_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.16b { v0, v1, v2, v3 }, [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 4 store i8* %tmp, i8** %ptr @@ -2629,8 +3513,11 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r( } define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_ld4r: -;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.16b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -2641,8 +3528,11 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_ld4r: -;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4 +; CHECK-LABEL: test_v8i8_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.8b { v0, v1, v2, v3 }, [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i32 4 store i8* %tmp, i8** %ptr @@ -2650,8 +3540,11 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* % } define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_ld4r: -;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.8b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -2662,8 +3555,11 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8. define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_ld4r: -;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8 +; CHECK-LABEL: test_v8i16_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.8h { v0, v1, v2, v3 }, [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 4 store i16* %tmp, i16** %ptr @@ -2671,8 +3567,12 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r( } define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_ld4r: -;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld4r.8h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -2683,8 +3583,11 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_ld4r: -;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8 +; CHECK-LABEL: test_v4i16_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.4h { v0, v1, v2, v3 }, [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i32 4 store i16* %tmp, i16** %ptr @@ -2692,8 +3595,12 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r( } define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_ld4r: -;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld4r.4h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -2704,8 +3611,11 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_ld4r: -;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 +; CHECK-LABEL: test_v4i32_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 store i32* %tmp, i32** %ptr @@ -2713,8 +3623,12 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r( } define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_ld4r: -;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -2724,8 +3638,11 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r( declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_ld4r: -;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 +; CHECK-LABEL: test_v2i32_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 store i32* %tmp, i32** %ptr @@ -2733,8 +3650,12 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r( } define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_ld4r: -;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -2745,8 +3666,11 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_ld4r: -;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2i64_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 4 store i64* %tmp, i64** %ptr @@ -2754,8 +3678,12 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r( } define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_ld4r: -;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -2765,8 +3693,11 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r( declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_ld4r: -;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1i64_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i32 4 store i64* %tmp, i64** %ptr @@ -2774,8 +3705,12 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r( } define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_ld4r: -;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -2786,8 +3721,11 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_ld4r: -;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 +; CHECK-LABEL: test_v4f32_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 4 store float* %tmp, float** %ptr @@ -2795,8 +3733,12 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i } define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_ld4r: -;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -2806,8 +3748,11 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_r declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_ld4r: -;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 +; CHECK-LABEL: test_v2f32_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i32 4 store float* %tmp, float** %ptr @@ -2815,8 +3760,12 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i } define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_ld4r: -;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -2827,8 +3776,11 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_ld4r: -;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2f64_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 4 store double* %tmp, double** %ptr @@ -2836,8 +3788,12 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po } define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_ld4r: -;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -2847,8 +3803,11 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_ld4r: -;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1f64_post_imm_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i32 4 store double* %tmp, double** %ptr @@ -2856,8 +3815,12 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po } define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_ld4r: -;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld4r: +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -2868,8 +3831,13 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_ld2lane: -;CHECK: ld2.b { v0, v1 }[0], [x0], #2 +; CHECK-LABEL: test_v16i8_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.b { v0, v1 }[0], [x0], #2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 2 store i8* %tmp, i8** %ptr @@ -2877,8 +3845,13 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, } define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_ld2lane: -;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.b { v0, v1 }[0], [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -2889,8 +3862,13 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_ld2lane: -;CHECK: ld2.b { v0, v1 }[0], [x0], #2 +; CHECK-LABEL: test_v8i8_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.b { v0, v1 }[0], [x0], #2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 2 store i8* %tmp, i8** %ptr @@ -2898,8 +3876,13 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 } define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_ld2lane: -;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.b { v0, v1 }[0], [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -2910,8 +3893,13 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_ld2lane: -;CHECK: ld2.h { v0, v1 }[0], [x0], #4 +; CHECK-LABEL: test_v8i16_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 2 store i16* %tmp, i16** %ptr @@ -2919,8 +3907,14 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr } define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_ld2lane: -;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -2931,8 +3925,13 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_ld2lane: -;CHECK: ld2.h { v0, v1 }[0], [x0], #4 +; CHECK-LABEL: test_v4i16_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 2 store i16* %tmp, i16** %ptr @@ -2940,8 +3939,14 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr } define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_ld2lane: -;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -2952,8 +3957,13 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_ld2lane: -;CHECK: ld2.s { v0, v1 }[0], [x0], #8 +; CHECK-LABEL: test_v4i32_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 2 store i32* %tmp, i32** %ptr @@ -2961,8 +3971,14 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr } define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_ld2lane: -;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -2973,8 +3989,13 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_ld2lane: -;CHECK: ld2.s { v0, v1 }[0], [x0], #8 +; CHECK-LABEL: test_v2i32_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 2 store i32* %tmp, i32** %ptr @@ -2982,8 +4003,14 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr } define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_ld2lane: -;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -2994,8 +4021,13 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_ld2lane: -;CHECK: ld2.d { v0, v1 }[0], [x0], #16 +; CHECK-LABEL: test_v2i64_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i32 2 store i64* %tmp, i64** %ptr @@ -3003,8 +4035,14 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr } define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_ld2lane: -;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -3015,8 +4053,13 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_ld2lane: -;CHECK: ld2.d { v0, v1 }[0], [x0], #16 +; CHECK-LABEL: test_v1i64_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i32 2 store i64* %tmp, i64** %ptr @@ -3024,8 +4067,14 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr } define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_ld2lane: -;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -3036,8 +4085,13 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_ld2lane: -;CHECK: ld2.s { v0, v1 }[0], [x0], #8 +; CHECK-LABEL: test_v4f32_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 2 store float* %tmp, float** %ptr @@ -3045,8 +4099,14 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, floa } define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_ld2lane: -;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -3057,8 +4117,13 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_ld2lane: -;CHECK: ld2.s { v0, v1 }[0], [x0], #8 +; CHECK-LABEL: test_v2f32_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 2 store float* %tmp, float** %ptr @@ -3066,8 +4131,14 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, floa } define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_ld2lane: -;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -3078,8 +4149,13 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_ld2lane: -;CHECK: ld2.d { v0, v1 }[0], [x0], #16 +; CHECK-LABEL: test_v2f64_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) %tmp = getelementptr double, double* %A, i32 2 store double* %tmp, double** %ptr @@ -3087,8 +4163,14 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, d } define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_ld2lane: -;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -3099,8 +4181,13 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_ld2lane: -;CHECK: ld2.d { v0, v1 }[0], [x0], #16 +; CHECK-LABEL: test_v1f64_post_imm_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) %tmp = getelementptr double, double* %A, i32 2 store double* %tmp, double** %ptr @@ -3108,8 +4195,14 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, d } define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_ld2lane: -;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -3120,8 +4213,14 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_ld3lane: -;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3 +; CHECK-LABEL: test_v16i8_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 3 store i8* %tmp, i8** %ptr @@ -3129,8 +4228,14 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, } define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_ld3lane: -;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -3141,8 +4246,14 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_ld3lane: -;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3 +; CHECK-LABEL: test_v8i8_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 3 store i8* %tmp, i8** %ptr @@ -3150,8 +4261,14 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** } define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_ld3lane: -;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -3162,8 +4279,14 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_ld3lane: -;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6 +; CHECK-LABEL: test_v8i16_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 3 store i16* %tmp, i16** %ptr @@ -3171,8 +4294,15 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, } define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_ld3lane: -;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -3183,8 +4313,14 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_ld3lane: -;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6 +; CHECK-LABEL: test_v4i16_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 3 store i16* %tmp, i16** %ptr @@ -3192,8 +4328,15 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, } define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_ld3lane: -;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -3204,8 +4347,14 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_ld3lane: -;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-LABEL: test_v4i32_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 3 store i32* %tmp, i32** %ptr @@ -3213,8 +4362,15 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, } define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_ld3lane: -;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -3225,8 +4381,14 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_ld3lane: -;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-LABEL: test_v2i32_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 3 store i32* %tmp, i32** %ptr @@ -3234,8 +4396,15 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, } define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_ld3lane: -;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -3246,8 +4415,14 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_ld3lane: -;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-LABEL: test_v2i64_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i32 3 store i64* %tmp, i64** %ptr @@ -3255,8 +4430,15 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, } define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_ld3lane: -;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -3267,8 +4449,14 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_ld3lane: -;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-LABEL: test_v1i64_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i32 3 store i64* %tmp, i64** %ptr @@ -3276,8 +4464,15 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, } define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_ld3lane: -;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -3288,8 +4483,14 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_ld3lane: -;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-LABEL: test_v4f32_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 3 store float* %tmp, float** %ptr @@ -3297,8 +4498,15 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(fl } define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_ld3lane: -;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -3309,8 +4517,14 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_ld3lane: -;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-LABEL: test_v2f32_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 3 store float* %tmp, float** %ptr @@ -3318,8 +4532,15 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(fl } define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_ld3lane: -;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -3330,8 +4551,14 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_ld3lane: -;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-LABEL: test_v2f64_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) %tmp = getelementptr double, double* %A, i32 3 store double* %tmp, double** %ptr @@ -3339,8 +4566,15 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane } define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_ld3lane: -;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -3351,8 +4585,14 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane. define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_ld3lane: -;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-LABEL: test_v1f64_post_imm_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) %tmp = getelementptr double, double* %A, i32 3 store double* %tmp, double** %ptr @@ -3360,8 +4600,15 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane } define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_ld3lane: -;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -3372,8 +4619,15 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane. define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_ld4lane: -;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 +; CHECK-LABEL: test_v16i8_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 4 store i8* %tmp, i8** %ptr @@ -3381,8 +4635,15 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la } define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_ld4lane: -;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -3393,8 +4654,15 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lan define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_ld4lane: -;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 +; CHECK-LABEL: test_v8i8_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 4 store i8* %tmp, i8** %ptr @@ -3402,8 +4670,15 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8 } define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_ld4lane: -;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc store i8* %tmp, i8** %ptr @@ -3414,8 +4689,15 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_ld4lane: -;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 +; CHECK-LABEL: test_v8i16_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 4 store i16* %tmp, i16** %ptr @@ -3423,8 +4705,16 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la } define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_ld4lane: -;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -3435,8 +4725,15 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_ld4lane: -;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 +; CHECK-LABEL: test_v4i16_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 4 store i16* %tmp, i16** %ptr @@ -3444,8 +4741,16 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la } define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_ld4lane: -;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc store i16* %tmp, i16** %ptr @@ -3456,8 +4761,15 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_ld4lane: -;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-LABEL: test_v4i32_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 store i32* %tmp, i32** %ptr @@ -3465,8 +4777,16 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la } define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_ld4lane: -;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -3477,8 +4797,15 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_ld4lane: -;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-LABEL: test_v2i32_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 store i32* %tmp, i32** %ptr @@ -3486,8 +4813,16 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la } define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_ld4lane: -;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc store i32* %tmp, i32** %ptr @@ -3498,8 +4833,15 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_ld4lane: -;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-LABEL: test_v2i64_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i32 4 store i64* %tmp, i64** %ptr @@ -3507,8 +4849,16 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la } define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_ld4lane: -;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -3519,8 +4869,15 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lan define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_ld4lane: -;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-LABEL: test_v1i64_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i32 4 store i64* %tmp, i64** %ptr @@ -3528,8 +4885,16 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la } define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_ld4lane: -;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc store i64* %tmp, i64** %ptr @@ -3540,8 +4905,15 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lan define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_ld4lane: -;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-LABEL: test_v4f32_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 4 store float* %tmp, float** %ptr @@ -3549,8 +4921,16 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i } define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_ld4lane: -;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -3561,8 +4941,15 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_ld4lane: -;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-LABEL: test_v2f32_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 4 store float* %tmp, float** %ptr @@ -3570,8 +4957,16 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i } define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_ld4lane: -;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc store float* %tmp, float** %ptr @@ -3582,8 +4977,15 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_ld4lane: -;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-LABEL: test_v2f64_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) %tmp = getelementptr double, double* %A, i32 4 store double* %tmp, double** %ptr @@ -3591,8 +4993,16 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po } define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_ld4lane: -;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -3603,8 +5013,15 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_ld4lane: -;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-LABEL: test_v1f64_post_imm_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) %tmp = getelementptr double, double* %A, i32 4 store double* %tmp, double** %ptr @@ -3612,8 +5029,16 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po } define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_ld4lane: -;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_ld4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc store double* %tmp, double** %ptr @@ -3624,16 +5049,24 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_st2: -;CHECK: st2.16b { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v16i8_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.16b { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) %tmp = getelementptr i8, i8* %A, i32 32 ret i8* %tmp } define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_st2: -;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.16b { v0, v1 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -3643,16 +5076,24 @@ declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_st2: -;CHECK: st2.8b { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v8i8_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st2.8b { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) %tmp = getelementptr i8, i8* %A, i32 16 ret i8* %tmp } define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_st2: -;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st2.8b { v0, v1 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -3662,16 +5103,25 @@ declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_st2: -;CHECK: st2.8h { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v8i16_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.8h { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) %tmp = getelementptr i16, i16* %A, i32 16 ret i16* %tmp } define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_st2: -;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.8h { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -3681,16 +5131,25 @@ declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_st2: -;CHECK: st2.4h { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v4i16_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st2.4h { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) %tmp = getelementptr i16, i16* %A, i32 8 ret i16* %tmp } define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_st2: -;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st2.4h { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -3700,16 +5159,25 @@ declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_st2: -;CHECK: st2.4s { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v4i32_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.4s { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) %tmp = getelementptr i32, i32* %A, i32 8 ret i32* %tmp } define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_st2: -;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -3719,16 +5187,25 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_st2: -;CHECK: st2.2s { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2i32_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st2.2s { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 ret i32* %tmp } define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_st2: -;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st2.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -3738,16 +5215,25 @@ declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_st2: -;CHECK: st2.2d { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v2i64_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.2d { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) %tmp = getelementptr i64, i64* %A, i64 4 ret i64* %tmp } define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_st2: -;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -3757,16 +5243,25 @@ declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_st2: -;CHECK: st1.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1i64_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) %tmp = getelementptr i64, i64* %A, i64 2 ret i64* %tmp } define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_st2: -;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -3776,16 +5271,25 @@ declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_st2: -;CHECK: st2.4s { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v4f32_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.4s { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) %tmp = getelementptr float, float* %A, i32 8 ret float* %tmp } define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_st2: -;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -3795,16 +5299,25 @@ declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float* define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_st2: -;CHECK: st2.2s { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2f32_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st2.2s { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) %tmp = getelementptr float, float* %A, i32 4 ret float* %tmp } define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_st2: -;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st2.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -3814,16 +5327,25 @@ declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float* define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_st2: -;CHECK: st2.2d { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v2f64_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.2d { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) %tmp = getelementptr double, double* %A, i64 4 ret double* %tmp } define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_st2: -;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -3833,16 +5355,25 @@ declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, doub define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_st2: -;CHECK: st1.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1f64_post_imm_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) %tmp = getelementptr double, double* %A, i64 2 ret double* %tmp } define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_st2: -;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_st2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -3852,16 +5383,26 @@ declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, doub define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_st3: -;CHECK: st3.16b { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v16i8_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.16b { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) %tmp = getelementptr i8, i8* %A, i32 48 ret i8* %tmp } define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_st3: -;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.16b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -3871,16 +5412,26 @@ declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_st3: -;CHECK: st3.8b { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v8i8_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st3.8b { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) %tmp = getelementptr i8, i8* %A, i32 24 ret i8* %tmp } define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_st3: -;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st3.8b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -3890,16 +5441,27 @@ declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_st3: -;CHECK: st3.8h { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v8i16_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.8h { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) %tmp = getelementptr i16, i16* %A, i32 24 ret i16* %tmp } define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_st3: -;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.8h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -3909,16 +5471,27 @@ declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_st3: -;CHECK: st3.4h { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v4i16_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st3.4h { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) %tmp = getelementptr i16, i16* %A, i32 12 ret i16* %tmp } define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_st3: -;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st3.4h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -3928,16 +5501,27 @@ declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_st3: -;CHECK: st3.4s { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v4i32_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) %tmp = getelementptr i32, i32* %A, i32 12 ret i32* %tmp } define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_st3: -;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -3947,16 +5531,27 @@ declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_st3: -;CHECK: st3.2s { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2i32_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) %tmp = getelementptr i32, i32* %A, i32 6 ret i32* %tmp } define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_st3: -;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -3966,16 +5561,27 @@ declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_st3: -;CHECK: st3.2d { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v2i64_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) %tmp = getelementptr i64, i64* %A, i64 6 ret i64* %tmp } define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_st3: -;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -3985,16 +5591,27 @@ declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_st3: -;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1i64_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) %tmp = getelementptr i64, i64* %A, i64 3 ret i64* %tmp } define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_st3: -;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -4004,16 +5621,27 @@ declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_st3: -;CHECK: st3.4s { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v4f32_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) %tmp = getelementptr float, float* %A, i32 12 ret float* %tmp } define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_st3: -;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4023,16 +5651,27 @@ declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x f define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_st3: -;CHECK: st3.2s { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2f32_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) %tmp = getelementptr float, float* %A, i32 6 ret float* %tmp } define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_st3: -;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4042,16 +5681,27 @@ declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x f define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_st3: -;CHECK: st3.2d { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v2f64_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) %tmp = getelementptr double, double* %A, i64 6 ret double* %tmp } define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_st3: -;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4061,16 +5711,27 @@ declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_st3: -;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1f64_post_imm_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) %tmp = getelementptr double, double* %A, i64 3 ret double* %tmp } define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_st3: -;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_st3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4080,16 +5741,28 @@ declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_st4: -;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v16i8_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) %tmp = getelementptr i8, i8* %A, i32 64 ret i8* %tmp } define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_st4: -;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -4099,16 +5772,28 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_st4: -;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v8i8_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) %tmp = getelementptr i8, i8* %A, i32 32 ret i8* %tmp } define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_st4: -;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -4118,16 +5803,29 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_st4: -;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v8i16_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) %tmp = getelementptr i16, i16* %A, i32 32 ret i16* %tmp } define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_st4: -;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -4137,16 +5835,29 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_st4: -;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v4i16_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) %tmp = getelementptr i16, i16* %A, i32 16 ret i16* %tmp } define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_st4: -;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -4156,16 +5867,29 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_st4: -;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v4i32_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) %tmp = getelementptr i32, i32* %A, i32 16 ret i32* %tmp } define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_st4: -;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -4175,16 +5899,29 @@ declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_st4: -;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2i32_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) %tmp = getelementptr i32, i32* %A, i32 8 ret i32* %tmp } define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_st4: -;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -4194,16 +5931,29 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_st4: -;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v2i64_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) %tmp = getelementptr i64, i64* %A, i64 8 ret i64* %tmp } define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_st4: -;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -4213,16 +5963,29 @@ declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_st4: -;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1i64_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) %tmp = getelementptr i64, i64* %A, i64 4 ret i64* %tmp } define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_st4: -;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -4232,16 +5995,29 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_st4: -;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v4f32_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) %tmp = getelementptr float, float* %A, i32 16 ret float* %tmp } define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_st4: -;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4251,16 +6027,29 @@ declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x f define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_st4: -;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2f32_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) %tmp = getelementptr float, float* %A, i32 8 ret float* %tmp } define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_st4: -;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4270,16 +6059,29 @@ declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x f define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_st4: -;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v2f64_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) %tmp = getelementptr double, double* %A, i64 8 ret double* %tmp } define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_st4: -;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4289,16 +6091,29 @@ declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_st4: -;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1f64_post_imm_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) %tmp = getelementptr double, double* %A, i64 4 ret double* %tmp } define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_st4: -;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_st4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4308,16 +6123,24 @@ declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_st1x2: -;CHECK: st1.16b { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v16i8_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.16b { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) %tmp = getelementptr i8, i8* %A, i32 32 ret i8* %tmp } define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_st1x2: -;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.16b { v0, v1 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -4327,16 +6150,24 @@ declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_st1x2: -;CHECK: st1.8b { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v8i8_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.8b { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) %tmp = getelementptr i8, i8* %A, i32 16 ret i8* %tmp } define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_st1x2: -;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.8b { v0, v1 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -4346,16 +6177,25 @@ declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_st1x2: -;CHECK: st1.8h { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v8i16_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.8h { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) %tmp = getelementptr i16, i16* %A, i32 16 ret i16* %tmp } define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_st1x2: -;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.8h { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -4365,16 +6205,25 @@ declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_st1x2: -;CHECK: st1.4h { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v4i16_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.4h { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) %tmp = getelementptr i16, i16* %A, i32 8 ret i16* %tmp } define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_st1x2: -;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.4h { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -4384,16 +6233,25 @@ declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_st1x2: -;CHECK: st1.4s { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v4i32_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.4s { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) %tmp = getelementptr i32, i32* %A, i32 8 ret i32* %tmp } define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_st1x2: -;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -4403,16 +6261,25 @@ declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_st1x2: -;CHECK: st1.2s { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2i32_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.2s { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 ret i32* %tmp } define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_st1x2: -;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -4422,16 +6289,25 @@ declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_st1x2: -;CHECK: st1.2d { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v2i64_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.2d { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) %tmp = getelementptr i64, i64* %A, i64 4 ret i64* %tmp } define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_st1x2: -;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -4441,16 +6317,25 @@ declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_st1x2: -;CHECK: st1.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1i64_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) %tmp = getelementptr i64, i64* %A, i64 2 ret i64* %tmp } define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_st1x2: -;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -4460,16 +6345,25 @@ declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_st1x2: -;CHECK: st1.4s { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v4f32_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.4s { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) %tmp = getelementptr float, float* %A, i32 8 ret float* %tmp } define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_st1x2: -;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.4s { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4479,16 +6373,25 @@ declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, floa define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_st1x2: -;CHECK: st1.2s { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v2f32_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.2s { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) %tmp = getelementptr float, float* %A, i32 4 ret float* %tmp } define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_st1x2: -;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.2s { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4498,16 +6401,25 @@ declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, floa define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_st1x2: -;CHECK: st1.2d { v0, v1 }, [x0], #32 +; CHECK-LABEL: test_v2f64_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.2d { v0, v1 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) %tmp = getelementptr double, double* %A, i64 4 ret double* %tmp } define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_st1x2: -;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st1.2d { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4517,16 +6429,25 @@ declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, do define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_st1x2: -;CHECK: st1.1d { v0, v1 }, [x0], #16 +; CHECK-LABEL: test_v1f64_post_imm_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.1d { v0, v1 }, [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) %tmp = getelementptr double, double* %A, i64 2 ret double* %tmp } define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_st1x2: -;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_st1x2: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-NEXT: st1.1d { v0, v1 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4536,16 +6457,26 @@ declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, do define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_st1x3: -;CHECK: st1.16b { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v16i8_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.16b { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) %tmp = getelementptr i8, i8* %A, i32 48 ret i8* %tmp } define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_st1x3: -;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.16b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -4555,16 +6486,26 @@ declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8> define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_st1x3: -;CHECK: st1.8b { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v8i8_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.8b { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) %tmp = getelementptr i8, i8* %A, i32 24 ret i8* %tmp } define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_st1x3: -;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.8b { v0, v1, v2 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -4574,16 +6515,27 @@ declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8 define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_st1x3: -;CHECK: st1.8h { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v8i16_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.8h { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) %tmp = getelementptr i16, i16* %A, i32 24 ret i16* %tmp } define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_st1x3: -;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.8h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -4593,16 +6545,27 @@ declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16 define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_st1x3: -;CHECK: st1.4h { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v4i16_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.4h { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) %tmp = getelementptr i16, i16* %A, i32 12 ret i16* %tmp } define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_st1x3: -;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.4h { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -4612,16 +6575,27 @@ declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16 define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_st1x3: -;CHECK: st1.4s { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v4i32_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) %tmp = getelementptr i32, i32* %A, i32 12 ret i32* %tmp } define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_st1x3: -;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -4631,16 +6605,27 @@ declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32 define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_st1x3: -;CHECK: st1.2s { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2i32_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) %tmp = getelementptr i32, i32* %A, i32 6 ret i32* %tmp } define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_st1x3: -;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -4650,16 +6635,27 @@ declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32 define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_st1x3: -;CHECK: st1.2d { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v2i64_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) %tmp = getelementptr i64, i64* %A, i64 6 ret i64* %tmp } define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_st1x3: -;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -4669,16 +6665,27 @@ declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64 define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_st1x3: -;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1i64_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) %tmp = getelementptr i64, i64* %A, i64 3 ret i64* %tmp } define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_st1x3: -;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -4688,16 +6695,27 @@ declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64 define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_st1x3: -;CHECK: st1.4s { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v4f32_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) %tmp = getelementptr float, float* %A, i32 12 ret float* %tmp } define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_st1x3: -;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4707,16 +6725,27 @@ declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_st1x3: -;CHECK: st1.2s { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v2f32_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) %tmp = getelementptr float, float* %A, i32 6 ret float* %tmp } define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_st1x3: -;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4726,16 +6755,27 @@ declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_st1x3: -;CHECK: st1.2d { v0, v1, v2 }, [x0], #48 +; CHECK-LABEL: test_v2f64_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) %tmp = getelementptr double, double* %A, i64 6 ret double* %tmp } define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_st1x3: -;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4745,16 +6785,27 @@ declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_st1x3: -;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-LABEL: test_v1f64_post_imm_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) %tmp = getelementptr double, double* %A, i64 3 ret double* %tmp } define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_st1x3: -;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_st1x3: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4764,16 +6815,28 @@ declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_st1x4: -;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v16i8_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) %tmp = getelementptr i8, i8* %A, i32 64 ret i8* %tmp } define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_st1x4: -;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -4783,16 +6846,28 @@ declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8> define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_st1x4: -;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v8i8_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) %tmp = getelementptr i8, i8* %A, i32 32 ret i8* %tmp } define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_st1x4: -;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -4802,16 +6877,29 @@ declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_st1x4: -;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v8i16_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) %tmp = getelementptr i16, i16* %A, i32 32 ret i16* %tmp } define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_st1x4: -;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -4821,16 +6909,29 @@ declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16 define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_st1x4: -;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v4i16_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) %tmp = getelementptr i16, i16* %A, i32 16 ret i16* %tmp } define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_st1x4: -;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -4840,16 +6941,29 @@ declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16 define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_st1x4: -;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v4i32_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) %tmp = getelementptr i32, i32* %A, i32 16 ret i32* %tmp } define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_st1x4: -;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -4859,16 +6973,29 @@ declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32 define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_st1x4: -;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2i32_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) %tmp = getelementptr i32, i32* %A, i32 8 ret i32* %tmp } define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_st1x4: -;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -4878,16 +7005,29 @@ declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32 define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_st1x4: -;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v2i64_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) %tmp = getelementptr i64, i64* %A, i64 8 ret i64* %tmp } define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_st1x4: -;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -4897,16 +7037,29 @@ declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64 define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_st1x4: -;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1i64_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) %tmp = getelementptr i64, i64* %A, i64 4 ret i64* %tmp } define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_st1x4: -;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -4916,16 +7069,29 @@ declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64 define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_st1x4: -;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v4f32_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) %tmp = getelementptr float, float* %A, i32 16 ret float* %tmp } define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_st1x4: -;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4935,16 +7101,29 @@ declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_st1x4: -;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v2f32_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) %tmp = getelementptr float, float* %A, i32 8 ret float* %tmp } define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_st1x4: -;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -4954,16 +7133,29 @@ declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_st1x4: -;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-LABEL: test_v2f64_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) %tmp = getelementptr double, double* %A, i64 8 ret double* %tmp } define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_st1x4: -;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4973,16 +7165,29 @@ declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_st1x4: -;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-LABEL: test_v1f64_post_imm_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) %tmp = getelementptr double, double* %A, i64 4 ret double* %tmp } define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_st1x4: -;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_st1x4: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -4990,33 +7195,25 @@ define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) - -define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) { - call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A) - %tmp = getelementptr i8, i8* %A, i32 2 - ret i8* %tmp -} - -define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) { - call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A) - %tmp = getelementptr i8, i8* %A, i64 %inc - ret i8* %tmp -} - -declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone - - define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_st2lane: -;CHECK: st2.b { v0, v1 }[0], [x0], #2 +; CHECK-LABEL: test_v16i8_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.b { v0, v1 }[0], [x0], #2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 2 ret i8* %tmp } define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_st2lane: -;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.b { v0, v1 }[0], [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -5026,16 +7223,24 @@ declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8 define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_st2lane: -;CHECK: st2.b { v0, v1 }[0], [x0], #2 +; CHECK-LABEL: test_v8i8_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.b { v0, v1 }[0], [x0], #2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 2 ret i8* %tmp } define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_st2lane: -;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.b { v0, v1 }[0], [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -5045,16 +7250,25 @@ declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_st2lane: -;CHECK: st2.h { v0, v1 }[0], [x0], #4 +; CHECK-LABEL: test_v8i16_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.h { v0, v1 }[0], [x0], #4 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 2 ret i16* %tmp } define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_st2lane: -;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.h { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -5064,16 +7278,25 @@ declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_st2lane: -;CHECK: st2.h { v0, v1 }[0], [x0], #4 +; CHECK-LABEL: test_v4i16_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.h { v0, v1 }[0], [x0], #4 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 2 ret i16* %tmp } define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_st2lane: -;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.h { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -5083,16 +7306,25 @@ declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_st2lane: -;CHECK: st2.s { v0, v1 }[0], [x0], #8 +; CHECK-LABEL: test_v4i32_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], #8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 2 ret i32* %tmp } define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_st2lane: -;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -5102,16 +7334,25 @@ declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_st2lane: -;CHECK: st2.s { v0, v1 }[0], [x0], #8 +; CHECK-LABEL: test_v2i32_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], #8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 2 ret i32* %tmp } define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_st2lane: -;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -5121,16 +7362,25 @@ declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_st2lane: -;CHECK: st2.d { v0, v1 }[0], [x0], #16 +; CHECK-LABEL: test_v2i64_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 2 ret i64* %tmp } define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_st2lane: -;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -5140,16 +7390,25 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_st2lane: -;CHECK: st2.d { v0, v1 }[0], [x0], #16 +; CHECK-LABEL: test_v1i64_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 2 ret i64* %tmp } define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_st2lane: -;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -5159,16 +7418,25 @@ declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_st2lane: -;CHECK: st2.s { v0, v1 }[0], [x0], #8 +; CHECK-LABEL: test_v4f32_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], #8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 2 ret float* %tmp } define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_st2lane: -;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -5178,16 +7446,25 @@ declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i6 define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_st2lane: -;CHECK: st2.s { v0, v1 }[0], [x0], #8 +; CHECK-LABEL: test_v2f32_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], #8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 2 ret float* %tmp } define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_st2lane: -;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.s { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -5197,16 +7474,25 @@ declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i6 define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_st2lane: -;CHECK: st2.d { v0, v1 }[0], [x0], #16 +; CHECK-LABEL: test_v2f64_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 2 ret double* %tmp } define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_st2lane: -;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -5216,16 +7502,25 @@ declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_st2lane: -;CHECK: st2.d { v0, v1 }[0], [x0], #16 +; CHECK-LABEL: test_v1f64_post_imm_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 2 ret double* %tmp } define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_st2lane: -;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_st2lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -5235,16 +7530,26 @@ declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_st3lane: -;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3 +; CHECK-LABEL: test_v16i8_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 3 ret i8* %tmp } define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_st3lane: -;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -5254,16 +7559,26 @@ declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_st3lane: -;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3 +; CHECK-LABEL: test_v8i8_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 3 ret i8* %tmp } define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_st3lane: -;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -5273,16 +7588,27 @@ declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_st3lane: -;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6 +; CHECK-LABEL: test_v8i16_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 3 ret i16* %tmp } define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_st3lane: -;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -5292,16 +7618,27 @@ declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_st3lane: -;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6 +; CHECK-LABEL: test_v4i16_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 3 ret i16* %tmp } define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_st3lane: -;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -5311,16 +7648,27 @@ declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_st3lane: -;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-LABEL: test_v4i32_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 3 ret i32* %tmp } define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_st3lane: -;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -5330,16 +7678,27 @@ declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_st3lane: -;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-LABEL: test_v2i32_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 3 ret i32* %tmp } define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_st3lane: -;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -5349,16 +7708,27 @@ declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_st3lane: -;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-LABEL: test_v2i64_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 3 ret i64* %tmp } define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_st3lane: -;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -5368,16 +7738,27 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_st3lane: -;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-LABEL: test_v1i64_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 3 ret i64* %tmp } define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_st3lane: -;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -5387,16 +7768,27 @@ declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_st3lane: -;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-LABEL: test_v4f32_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 3 ret float* %tmp } define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_st3lane: -;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -5406,16 +7798,27 @@ declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_st3lane: -;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-LABEL: test_v2f32_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 3 ret float* %tmp } define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_st3lane: -;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -5425,16 +7828,27 @@ declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_st3lane: -;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-LABEL: test_v2f64_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 3 ret double* %tmp } define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_st3lane: -;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -5444,16 +7858,27 @@ declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_st3lane: -;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-LABEL: test_v1f64_post_imm_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 3 ret double* %tmp } define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_st3lane: -;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_st3lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -5463,16 +7888,28 @@ declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { -;CHECK-LABEL: test_v16i8_post_imm_st4lane: -;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4 +; CHECK-LABEL: test_v16i8_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 4 ret i8* %tmp } define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v16i8_post_reg_st4lane: -;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v16i8_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -5482,16 +7919,28 @@ declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { -;CHECK-LABEL: test_v8i8_post_imm_st4lane: -;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4 +; CHECK-LABEL: test_v8i8_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i32 4 ret i8* %tmp } define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i8_post_reg_st4lane: -;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i8_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) %tmp = getelementptr i8, i8* %A, i64 %inc ret i8* %tmp @@ -5501,16 +7950,29 @@ declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { -;CHECK-LABEL: test_v8i16_post_imm_st4lane: -;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8 +; CHECK-LABEL: test_v8i16_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 4 ret i16* %tmp } define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v8i16_post_reg_st4lane: -;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v8i16_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -5520,16 +7982,29 @@ declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { -;CHECK-LABEL: test_v4i16_post_imm_st4lane: -;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8 +; CHECK-LABEL: test_v4i16_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i32 4 ret i16* %tmp } define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i16_post_reg_st4lane: -;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i16_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) %tmp = getelementptr i16, i16* %A, i64 %inc ret i16* %tmp @@ -5539,16 +8014,29 @@ declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { -;CHECK-LABEL: test_v4i32_post_imm_st4lane: -;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-LABEL: test_v4i32_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 ret i32* %tmp } define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v4i32_post_reg_st4lane: -;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4i32_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -5558,16 +8046,29 @@ declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { -;CHECK-LABEL: test_v2i32_post_imm_st4lane: -;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-LABEL: test_v2i32_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i32 4 ret i32* %tmp } define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i32_post_reg_st4lane: -;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i32_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) %tmp = getelementptr i32, i32* %A, i64 %inc ret i32* %tmp @@ -5577,16 +8078,29 @@ declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { -;CHECK-LABEL: test_v2i64_post_imm_st4lane: -;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-LABEL: test_v2i64_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 4 ret i64* %tmp } define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2i64_post_reg_st4lane: -;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2i64_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -5596,16 +8110,29 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { -;CHECK-LABEL: test_v1i64_post_imm_st4lane: -;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-LABEL: test_v1i64_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 4 ret i64* %tmp } define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v1i64_post_reg_st4lane: -;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1i64_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) %tmp = getelementptr i64, i64* %A, i64 %inc ret i64* %tmp @@ -5615,16 +8142,29 @@ declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { -;CHECK-LABEL: test_v4f32_post_imm_st4lane: -;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-LABEL: test_v4f32_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 4 ret float* %tmp } define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v4f32_post_reg_st4lane: -;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v4f32_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -5634,16 +8174,29 @@ declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { -;CHECK-LABEL: test_v2f32_post_imm_st4lane: -;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-LABEL: test_v2f32_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) %tmp = getelementptr float, float* %A, i32 4 ret float* %tmp } define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f32_post_reg_st4lane: -;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f32_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) %tmp = getelementptr float, float* %A, i64 %inc ret float* %tmp @@ -5653,16 +8206,29 @@ declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { -;CHECK-LABEL: test_v2f64_post_imm_st4lane: -;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-LABEL: test_v2f64_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 4 ret double* %tmp } define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v2f64_post_reg_st4lane: -;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v2f64_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -5672,16 +8238,29 @@ declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { -;CHECK-LABEL: test_v1f64_post_imm_st4lane: -;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-LABEL: test_v1f64_post_imm_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 4 ret double* %tmp } define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { -;CHECK-LABEL: test_v1f64_post_reg_st4lane: -;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} +; CHECK-LABEL: test_v1f64_post_reg_st4lane: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 +; CHECK-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) %tmp = getelementptr double, double* %A, i64 %inc ret double* %tmp @@ -5691,7 +8270,10 @@ declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) { ; CHECK-LABEL: test_v16i8_post_imm_ld1r: -; CHECK: ld1r.16b { v0 }, [x0], #1 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.16b { v0 }, [x0], #1 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i8, i8* %bar %tmp2 = insertelement <16 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 @@ -5716,7 +8298,10 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) { define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) { ; CHECK-LABEL: test_v16i8_post_reg_ld1r: -; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.16b { v0 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i8, i8* %bar %tmp2 = insertelement <16 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 @@ -5741,7 +8326,10 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) { define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) { ; CHECK-LABEL: test_v8i8_post_imm_ld1r: -; CHECK: ld1r.8b { v0 }, [x0], #1 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.8b { v0 }, [x0], #1 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i8, i8* %bar %tmp2 = insertelement <8 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 @@ -5758,7 +8346,10 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) { define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) { ; CHECK-LABEL: test_v8i8_post_reg_ld1r: -; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.8b { v0 }, [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i8, i8* %bar %tmp2 = insertelement <8 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 @@ -5775,7 +8366,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) { define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) { ; CHECK-LABEL: test_v8i16_post_imm_ld1r: -; CHECK: ld1r.8h { v0 }, [x0], #2 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.8h { v0 }, [x0], #2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i16, i16* %bar %tmp2 = insertelement <8 x i16> , i16 %tmp1, i32 0 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1 @@ -5792,7 +8386,11 @@ define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) { define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) { ; CHECK-LABEL: test_v8i16_post_reg_ld1r: -; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1r.8h { v0 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i16, i16* %bar %tmp2 = insertelement <8 x i16> , i16 %tmp1, i32 0 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1 @@ -5809,7 +8407,10 @@ define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) { define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) { ; CHECK-LABEL: test_v4i16_post_imm_ld1r: -; CHECK: ld1r.4h { v0 }, [x0], #2 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.4h { v0 }, [x0], #2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i16, i16* %bar %tmp2 = insertelement <4 x i16> , i16 %tmp1, i32 0 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1 @@ -5822,7 +8423,11 @@ define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) { define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) { ; CHECK-LABEL: test_v4i16_post_reg_ld1r: -; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1r.4h { v0 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i16, i16* %bar %tmp2 = insertelement <4 x i16> , i16 %tmp1, i32 0 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1 @@ -5835,7 +8440,10 @@ define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) { define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) { ; CHECK-LABEL: test_v4i32_post_imm_ld1r: -; CHECK: ld1r.4s { v0 }, [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.4s { v0 }, [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i32, i32* %bar %tmp2 = insertelement <4 x i32> , i32 %tmp1, i32 0 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1 @@ -5848,7 +8456,11 @@ define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) { define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) { ; CHECK-LABEL: test_v4i32_post_reg_ld1r: -; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1r.4s { v0 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i32, i32* %bar %tmp2 = insertelement <4 x i32> , i32 %tmp1, i32 0 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1 @@ -5861,7 +8473,10 @@ define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) { define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) { ; CHECK-LABEL: test_v2i32_post_imm_ld1r: -; CHECK: ld1r.2s { v0 }, [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.2s { v0 }, [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i32, i32* %bar %tmp2 = insertelement <2 x i32> , i32 %tmp1, i32 0 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1 @@ -5872,7 +8487,11 @@ define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) { define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) { ; CHECK-LABEL: test_v2i32_post_reg_ld1r: -; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1r.2s { v0 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i32, i32* %bar %tmp2 = insertelement <2 x i32> , i32 %tmp1, i32 0 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1 @@ -5883,7 +8502,10 @@ define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) { define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) { ; CHECK-LABEL: test_v2i64_post_imm_ld1r: -; CHECK: ld1r.2d { v0 }, [x0], #8 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.2d { v0 }, [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i64, i64* %bar %tmp2 = insertelement <2 x i64> , i64 %tmp1, i32 0 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1 @@ -5894,7 +8516,11 @@ define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) { define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) { ; CHECK-LABEL: test_v2i64_post_reg_ld1r: -; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1r.2d { v0 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i64, i64* %bar %tmp2 = insertelement <2 x i64> , i64 %tmp1, i32 0 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1 @@ -5905,7 +8531,10 @@ define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) { define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) { ; CHECK-LABEL: test_v4f32_post_imm_ld1r: -; CHECK: ld1r.4s { v0 }, [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.4s { v0 }, [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load float, float* %bar %tmp2 = insertelement <4 x float> , float %tmp1, i32 0 %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1 @@ -5918,7 +8547,11 @@ define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) { define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) { ; CHECK-LABEL: test_v4f32_post_reg_ld1r: -; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1r.4s { v0 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load float, float* %bar %tmp2 = insertelement <4 x float> , float %tmp1, i32 0 %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1 @@ -5931,7 +8564,10 @@ define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) { ; CHECK-LABEL: test_v2f32_post_imm_ld1r: -; CHECK: ld1r.2s { v0 }, [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.2s { v0 }, [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load float, float* %bar %tmp2 = insertelement <2 x float> , float %tmp1, i32 0 %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1 @@ -5942,7 +8578,11 @@ define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) { define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) { ; CHECK-LABEL: test_v2f32_post_reg_ld1r: -; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1r.2s { v0 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load float, float* %bar %tmp2 = insertelement <2 x float> , float %tmp1, i32 0 %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1 @@ -5953,7 +8593,10 @@ define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) { ; CHECK-LABEL: test_v2f64_post_imm_ld1r: -; CHECK: ld1r.2d { v0 }, [x0], #8 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1r.2d { v0 }, [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load double, double* %bar %tmp2 = insertelement <2 x double> , double %tmp1, i32 0 %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1 @@ -5964,7 +8607,11 @@ define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) { define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) { ; CHECK-LABEL: test_v2f64_post_reg_ld1r: -; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1r.2d { v0 }, [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load double, double* %bar %tmp2 = insertelement <2 x double> , double %tmp1, i32 0 %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1 @@ -5975,7 +8622,10 @@ define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 % define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) { ; CHECK-LABEL: test_v16i8_post_imm_ld1lane: -; CHECK: ld1.b { v0 }[1], [x0], #1 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.b { v0 }[1], [x0], #1 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i8, i8* %bar %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1 %tmp3 = getelementptr i8, i8* %bar, i64 1 @@ -5985,7 +8635,10 @@ define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) { ; CHECK-LABEL: test_v16i8_post_reg_ld1lane: -; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.b { v0 }[1], [x0], x2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i8, i8* %bar %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1 %tmp3 = getelementptr i8, i8* %bar, i64 %inc @@ -5995,7 +8648,12 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) { ; CHECK-LABEL: test_v8i8_post_imm_ld1lane: -; CHECK: ld1.b { v0 }[1], [x0], #1 +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.b { v0 }[1], [x0], #1 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i8, i8* %bar %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1 %tmp3 = getelementptr i8, i8* %bar, i64 1 @@ -6005,7 +8663,12 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) { define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) { ; CHECK-LABEL: test_v8i8_post_reg_ld1lane: -; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.b { v0 }[1], [x0], x2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i8, i8* %bar %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1 %tmp3 = getelementptr i8, i8* %bar, i64 %inc @@ -6015,7 +8678,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) { ; CHECK-LABEL: test_v8i16_post_imm_ld1lane: -; CHECK: ld1.h { v0 }[1], [x0], #2 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.h { v0 }[1], [x0], #2 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i16, i16* %bar %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1 %tmp3 = getelementptr i16, i16* %bar, i64 1 @@ -6025,7 +8691,11 @@ define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> % define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) { ; CHECK-LABEL: test_v8i16_post_reg_ld1lane: -; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i16, i16* %bar %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1 %tmp3 = getelementptr i16, i16* %bar, i64 %inc @@ -6035,7 +8705,12 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, < define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) { ; CHECK-LABEL: test_v4i16_post_imm_ld1lane: -; CHECK: ld1.h { v0 }[1], [x0], #2 +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.h { v0 }[1], [x0], #2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i16, i16* %bar %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1 %tmp3 = getelementptr i16, i16* %bar, i64 1 @@ -6045,7 +8720,13 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> % define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) { ; CHECK-LABEL: test_v4i16_post_reg_ld1lane: -; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i16, i16* %bar %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1 %tmp3 = getelementptr i16, i16* %bar, i64 %inc @@ -6055,7 +8736,10 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, < define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) { ; CHECK-LABEL: test_v4i32_post_imm_ld1lane: -; CHECK: ld1.s { v0 }[1], [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i32, i32* %bar %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1 %tmp3 = getelementptr i32, i32* %bar, i64 1 @@ -6065,7 +8749,11 @@ define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> % define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) { ; CHECK-LABEL: test_v4i32_post_reg_ld1lane: -; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i32, i32* %bar %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1 %tmp3 = getelementptr i32, i32* %bar, i64 %inc @@ -6075,7 +8763,12 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, < define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) { ; CHECK-LABEL: test_v2i32_post_imm_ld1lane: -; CHECK: ld1.s { v0 }[1], [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i32, i32* %bar %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1 %tmp3 = getelementptr i32, i32* %bar, i64 1 @@ -6085,7 +8778,13 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> % define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) { ; CHECK-LABEL: test_v2i32_post_reg_ld1lane: -; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i32, i32* %bar %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1 %tmp3 = getelementptr i32, i32* %bar, i64 %inc @@ -6095,7 +8794,10 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, < define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) { ; CHECK-LABEL: test_v2i64_post_imm_ld1lane: -; CHECK: ld1.d { v0 }[1], [x0], #8 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.d { v0 }[1], [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i64, i64* %bar %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1 %tmp3 = getelementptr i64, i64* %bar, i64 1 @@ -6105,7 +8807,11 @@ define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> % define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) { ; CHECK-LABEL: test_v2i64_post_reg_ld1lane: -; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.d { v0 }[1], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load i64, i64* %bar %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1 %tmp3 = getelementptr i64, i64* %bar, i64 %inc @@ -6115,7 +8821,10 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, < define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) { ; CHECK-LABEL: test_v4f32_post_imm_ld1lane: -; CHECK: ld1.s { v0 }[1], [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load float, float* %bar %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1 %tmp3 = getelementptr float, float* %bar, i64 1 @@ -6125,7 +8834,11 @@ define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) { ; CHECK-LABEL: test_v4f32_post_reg_ld1lane: -; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load float, float* %bar %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1 %tmp3 = getelementptr float, float* %bar, i64 %inc @@ -6135,7 +8848,12 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 % define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) { ; CHECK-LABEL: test_v2f32_post_imm_ld1lane: -; CHECK: ld1.s { v0 }[1], [x0], #4 +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.s { v0 }[1], [x0], #4 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load float, float* %bar %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1 %tmp3 = getelementptr float, float* %bar, i64 1 @@ -6145,7 +8863,13 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) { ; CHECK-LABEL: test_v2f32_post_reg_ld1lane: -; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: lsl x8, x2, #2 +; CHECK-NEXT: ld1.s { v0 }[1], [x0], x8 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load float, float* %bar %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1 %tmp3 = getelementptr float, float* %bar, i64 %inc @@ -6155,7 +8879,10 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 % define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) { ; CHECK-LABEL: test_v2f64_post_imm_ld1lane: -; CHECK: ld1.d { v0 }[1], [x0], #8 +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.d { v0 }[1], [x0], #8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load double, double* %bar %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1 %tmp3 = getelementptr double, double* %bar, i64 1 @@ -6165,7 +8892,11 @@ define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) { ; CHECK-LABEL: test_v2f64_post_reg_ld1lane: -; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: lsl x8, x2, #3 +; CHECK-NEXT: ld1.d { v0 }[1], [x0], x8 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ret %tmp1 = load double, double* %bar %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1 %tmp3 = getelementptr double, double* %bar, i64 %inc @@ -6176,14 +8907,14 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i6 ; Check for dependencies between the vector and the scalar load. define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2, <4 x float> %vec) { ; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load: -; CHECK: %bb.0: -; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0] -; CHECK-NEXT: str q0, [x3] -; CHECK-NEXT: ldr q0, [x4] -; CHECK-NEXT: mov.s v0[1], v[[LD]][0] -; CHECK-NEXT: add [[POST:x[0-9]]], x0, x2, lsl #2 -; CHECK-NEXT: str [[POST]], [x1] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: str q0, [x3] +; CHECK-NEXT: ldr q0, [x4] +; CHECK-NEXT: add x8, x0, x2, lsl #2 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: mov.s v0[1], v1[0] +; CHECK-NEXT: ret %tmp1 = load float, float* %bar store <4 x float> %vec, <4 x float>* %dep_ptr_1, align 16 %A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16 @@ -6202,7 +8933,18 @@ define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, flo ; PR23265 define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A, <2 x i32>* %d) { ; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow: -; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}} +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: lsl x8, x2, #1 +; CHECK-NEXT: ld1.h { v0 }[1], [x0], x8 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: str x0, [x1] +; CHECK-NEXT: ldr d1, [x3] +; CHECK-NEXT: cnt.8b v1, v1 +; CHECK-NEXT: uaddlp.4h v1, v1 +; CHECK-NEXT: uaddlp.2s v1, v1 +; CHECK-NEXT: str d1, [x3] +; CHECK-NEXT: ret %tmp1 = load i16, i16* %bar %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1 %tmp3 = getelementptr i16, i16* %bar, i64 %inc @@ -6215,15 +8957,16 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %pt declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) -; CHECK-LABEL: test_ld1lane_build: -; CHECK-DAG: ldr s[[REGNUM0:[0-9]+]], [x0] -; CHECK-DAG: ld1.s { v[[REGNUM0:[0-9]+]] }[1], [x1] -; CHECK-DAG: ldr s[[REGNUM1:[0-9]+]], [x2] -; CHECK-DAG: ld1.s { v[[REGNUM1:[0-9]+]] }[1], [x3] -; CHECK: sub.2s v[[REGNUM2:[0-9]+]], v[[REGNUM0]], v[[REGNUM1]] -; CHECK-NEXT: str d[[REGNUM2]], [x4] -; CHECK-NEXT: ret define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) { +; CHECK-LABEL: test_ld1lane_build: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x2] +; CHECK-NEXT: ld1.s { v0 }[1], [x1] +; CHECK-NEXT: ld1.s { v1 }[1], [x3] +; CHECK-NEXT: sub.2s v0, v0, v1 +; CHECK-NEXT: str d0, [x4] +; CHECK-NEXT: ret %load0 = load i32, i32* %ptr0, align 4 %load1 = load i32, i32* %ptr1, align 4 %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0 @@ -6239,15 +8982,16 @@ define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, ret void } -; CHECK-LABEL: test_ld1lane_build_i16: -; CHECK-DAG: ldr h[[REGNUM1:[0-9]+]], [x0] -; CHECK-DAG: ld1.h { v[[REGNUM1]] }[1], [x1] -; CHECK-DAG: ld1.h { v[[REGNUM1]] }[2], [x2] -; CHECK-DAG: ld1.h { v[[REGNUM1]] }[3], [x3] -; CHECK: sub.4h v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0 -; CHECK-NEXT: str d[[REGNUM2]], [x4] -; CHECK-NEXT: ret define void @test_ld1lane_build_i16(i16* %a, i16* %b, i16* %c, i16* %d, <4 x i16> %e, <4 x i16>* %p) { +; CHECK-LABEL: test_ld1lane_build_i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: ld1.h { v1 }[1], [x1] +; CHECK-NEXT: ld1.h { v1 }[2], [x2] +; CHECK-NEXT: ld1.h { v1 }[3], [x3] +; CHECK-NEXT: sub.4h v0, v1, v0 +; CHECK-NEXT: str d0, [x4] +; CHECK-NEXT: ret %ld.a = load i16, i16* %a %ld.b = load i16, i16* %b %ld.c = load i16, i16* %c @@ -6261,18 +9005,19 @@ define void @test_ld1lane_build_i16(i16* %a, i16* %b, i16* %c, i16* %d, <4 x i1 ret void } -; CHECK-LABEL: test_ld1lane_build_half: -; CHECK-DAG: ldr h[[REGNUM1:[0-9]+]], [x0] -; CHECK-DAG: ld1.h { v[[REGNUM1]] }[1], [x1] -; CHECK-DAG: ld1.h { v[[REGNUM1]] }[2], [x2] -; CHECK-DAG: ld1.h { v[[REGNUM1]] }[3], [x3] -; CHECK-DAG: fcvtl v[[REGNUM01:[0-9]+]].4s, v0.4h -; CHECK-DAG: fcvtl v[[REGNUM11:[0-9]+]].4s, v[[REGNUM1]].4h -; CHECK: fsub.4s v[[REGNUM2:[0-9]+]], v[[REGNUM11]], v[[REGNUM01]] -; CHECK-DAG: fcvtn v[[REGNUM3:[0-9]+]].4h, v[[REGNUM2]].4s -; CHECK-NEXT: str d[[REGNUM2]], [x4] -; CHECK-NEXT: ret define void @test_ld1lane_build_half(half* %a, half* %b, half* %c, half* %d, <4 x half> %e, <4 x half>* %p) { +; CHECK-LABEL: test_ld1lane_build_half: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: ld1.h { v1 }[1], [x1] +; CHECK-NEXT: ld1.h { v1 }[2], [x2] +; CHECK-NEXT: ld1.h { v1 }[3], [x3] +; CHECK-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NEXT: fsub.4s v0, v1, v0 +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: str d0, [x4] +; CHECK-NEXT: ret %ld.a = load half, half* %a %ld.b = load half, half* %b %ld.c = load half, half* %c @@ -6286,19 +9031,21 @@ define void @test_ld1lane_build_half(half* %a, half* %b, half* %c, half* %d, <4 ret void } -; CHECK-LABEL: test_ld1lane_build_i8: -; CHECK-DAG: ldr b[[REGNUM1:[0-9]+]], [x0] -; CHECK-DAG: ld1.b { v[[REGNUM1]] }[1], [x1] -; CHECK-DAG: ld1.b { v[[REGNUM1]] }[2], [x2] -; CHECK-DAG: ld1.b { v[[REGNUM1]] }[3], [x3] -; CHECK-DAG: ld1.b { v[[REGNUM1]] }[4], [x4] -; CHECK-DAG: ld1.b { v[[REGNUM1]] }[5], [x5] -; CHECK-DAG: ld1.b { v[[REGNUM1]] }[6], [x6] -; CHECK-DAG: ld1.b { v[[REGNUM1]] }[7], [x7] -; CHECK: sub.8b v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0 -; CHECK-NEXT: str d[[REGNUM2]], [x -; CHECK-NEXT: ret define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* %f, i8* %g, i8* %h, <8 x i8> %v, <8 x i8>* %p) { +; CHECK-LABEL: test_ld1lane_build_i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: ld1.b { v1 }[1], [x1] +; CHECK-NEXT: ld1.b { v1 }[2], [x2] +; CHECK-NEXT: ld1.b { v1 }[3], [x3] +; CHECK-NEXT: ld1.b { v1 }[4], [x4] +; CHECK-NEXT: ld1.b { v1 }[5], [x5] +; CHECK-NEXT: ld1.b { v1 }[6], [x6] +; CHECK-NEXT: ld1.b { v1 }[7], [x7] +; CHECK-NEXT: sub.8b v0, v1, v0 +; CHECK-NEXT: str d0, [x8] +; CHECK-NEXT: ret %ld.a = load i8, i8* %a %ld.b = load i8, i8* %b %ld.c = load i8, i8* %c @@ -6322,8 +9069,13 @@ define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) { ; CHECK-LABEL: test_inc_cycle: -; CHECK: ld1.s { v0 }[0], [x0]{{$}} - +; CHECK: ; %bb.0: +; CHECK-NEXT: ld1.s { v0 }[0], [x0] +; CHECK-NEXT: adrp x8, _var@PAGE +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: add x9, x0, x9, lsl #2 +; CHECK-NEXT: str x9, [x8, _var@PAGEOFF] +; CHECK-NEXT: ret %elt = load i32, i32* %in %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0 @@ -6340,95 +9092,106 @@ define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) { @var = global i32* null define i8 @load_single_extract_variable_index_i8(<16 x i8>* %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_i8 -; CHECK: ldr [[VEC:.*]], [x0] -; CHECK-NEXT: mov [[SP_ADDR:.*]], sp -; CHECK-NEXT: str [[VEC]], [sp] -; CHECK-NEXT: bfxil [[SP_ADDR]], x1, #0, #4 -; CHECK-NEXT: ldrb w0, [{{ *}}[[SP_ADDR]]{{ *}}] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret -; +; CHECK-LABEL: load_single_extract_variable_index_i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: bfxil x8, x1, #0, #4 +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: ldrb w0, [x8] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret %lv = load <16 x i8>, <16 x i8>* %A %e = extractelement <16 x i8> %lv, i32 %idx ret i8 %e } define i16 @load_single_extract_variable_index_i16(<8 x i16>* %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_i16 -; CHECK: ldr [[VEC:.*]], [x0] -; CHECK-NEXT: and [[IDX:.*]], x1, #0x7 -; CHECK-NEXT: mov [[SP_ADDR:.*]], sp -; CHECK-NEXT: str [[VEC]], [sp] -; CHECK-NEXT: bfi [[SP_ADDR]], [[IDX]], #1, #3 -; CHECK-NEXT: ldrh w0, [{{ *}}[[SP_ADDR]]{{ *}}] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret -; +; CHECK-LABEL: load_single_extract_variable_index_i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x8, x1, #0x7 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: bfi x9, x8, #1, #3 +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: ldrh w0, [x9] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret %lv = load <8 x i16>, <8 x i16>* %A %e = extractelement <8 x i16> %lv, i32 %idx ret i16 %e } define i32 @load_single_extract_variable_index_i32(<4 x i32>* %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_i32 -; CHECK: and [[IDX:.*]], x1, #0x3 -; CHECK-NEXT: ldr w0, [x0, [[IDX]], lsl #2] -; CHECK-NEXT: ret -; +; CHECK-LABEL: load_single_extract_variable_index_i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x8, x1, #0x3 +; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] +; CHECK-NEXT: ret %lv = load <4 x i32>, <4 x i32>* %A %e = extractelement <4 x i32> %lv, i32 %idx ret i32 %e } define i32 @load_single_extract_variable_index_v3i32_small_align(<3 x i32>* %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align -; CHECK: ldr d0, [x0] -; CHECK-NEXT: add x[[PTR_ADD:.*]], x0, #8 -; CHECK-NEXT: ld1.s { v0 }[2], [x[[PTR_ADD]]] -; CHECK-NEXT: and [[IDX_1:.*]], x1, #0x3 -; CHECK-NEXT: mov x[[IDX_2:.*]], sp -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x[[IDX_2]], [[IDX_1]], #2, #2 -; CHECK-NEXT: ldr w0, [x[[IDX_2]]] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret -; +; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: add x8, x0, #8 +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1.s { v0 }[2], [x8] +; CHECK-NEXT: and x8, x1, #0x3 +; CHECK-NEXT: bfi x9, x8, #2, #2 +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: ldr w0, [x9] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret %lv = load <3 x i32>, <3 x i32>* %A, align 2 %e = extractelement <3 x i32> %lv, i32 %idx ret i32 %e } define i32 @load_single_extract_variable_index_v3i32_default_align(<3 x i32>* %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align -; CHECK: sxtw [[IDX:.*]], w1 -; CHECK-NEXT: cmp [[IDX]], #2 -; CHECK-NEXT: mov w[[TMP:.*]], #2 -; CHECK-NEXT: csel [[IDX]], [[IDX]], x[[TMP]], lo -; CHECK-NEXT: ldr w0, [x0, [[IDX]], lsl #2] -; CHECK-NEXT: ret -; +; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: cmp x8, #2 +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] +; CHECK-NEXT: ret %lv = load <3 x i32>, <3 x i32>* %A %e = extractelement <3 x i32> %lv, i32 %idx ret i32 %e } define i32 @load_single_extract_valid_const_index_v3i32(<3 x i32>* %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_valid_const_index_v3i32 -; CHECK: ldr w0, [x0, #8] -; CHECK-NEXT: ret -; +; CHECK-LABEL: load_single_extract_valid_const_index_v3i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr w0, [x0, #8] +; CHECK-NEXT: ret %lv = load <3 x i32>, <3 x i32>* %A %e = extractelement <3 x i32> %lv, i32 2 ret i32 %e } define i32 @load_single_extract_variable_index_masked_i32(<4 x i32>* %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_masked_i32 -; CHECK: and [[IDX:.*]], w1, #0x3 -; CHECK-NEXT: ldr w0, [x0, [[IDX]], uxtw #2] -; CHECK-NEXT: ret -; +; CHECK-LABEL: load_single_extract_variable_index_masked_i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: and w8, w1, #0x3 +; CHECK-NEXT: ldr w0, [x0, w8, uxtw #2] +; CHECK-NEXT: ret %idx.x = and i32 %idx, 3 %lv = load <4 x i32>, <4 x i32>* %A %e = extractelement <4 x i32> %lv, i32 %idx.x @@ -6436,11 +9199,11 @@ define i32 @load_single_extract_variable_index_masked_i32(<4 x i32>* %A, i32 %id } define i32 @load_single_extract_variable_index_masked2_i32(<4 x i32>* %A, i32 %idx) { -; CHECK-LABEL: load_single_extract_variable_index_masked2_i32 -; CHECK: and [[IDX:.*]], w1, #0x1 -; CHECK-NEXT: ldr w0, [x0, [[IDX]], uxtw #2] -; CHECK-NEXT: ret -; +; CHECK-LABEL: load_single_extract_variable_index_masked2_i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: and w8, w1, #0x1 +; CHECK-NEXT: ldr w0, [x0, w8, uxtw #2] +; CHECK-NEXT: ret %idx.x = and i32 %idx, 1 %lv = load <4 x i32>, <4 x i32>* %A %e = extractelement <4 x i32> %lv, i32 %idx.x diff --git a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll index cb3e095..f9286f5 100644 --- a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll @@ -1,27 +1,40 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as | FileCheck %s ; rdar://9167275 define i32 @t1() nounwind ssp { -entry: ; CHECK-LABEL: t1: -; CHECK: mov {{w[0-9]+}}, 7 +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov w0, 7 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret +entry: %0 = tail call i32 asm "mov ${0:w}, 7", "=r"() nounwind ret i32 %0 } define i64 @t2() nounwind ssp { -entry: ; CHECK-LABEL: t2: -; CHECK: mov {{x[0-9]+}}, 7 +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov x0, 7 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret +entry: %0 = tail call i64 asm "mov $0, 7", "=r"() nounwind ret i64 %0 } define i64 @t3() nounwind ssp { -entry: ; CHECK-LABEL: t3: -; CHECK: mov {{w[0-9]+}}, 7 +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov w0, 7 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret +entry: %0 = tail call i64 asm "mov ${0:w}, 7", "=r"() nounwind ret i64 %0 } @@ -29,9 +42,14 @@ entry: ; rdar://9281206 define void @t4(i64 %op) nounwind { -entry: ; CHECK-LABEL: t4: -; CHECK: mov x0, {{x[0-9]+}}; svc #0 +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov x0, x8; svc #0; +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret +entry: %0 = tail call i64 asm sideeffect "mov x0, $1; svc #0;", "=r,r,r,~{x0}"(i64 %op, i64 undef) nounwind ret void } @@ -39,9 +57,13 @@ entry: ; rdar://9394290 define float @t5(float %x) nounwind { -entry: ; CHECK-LABEL: t5: -; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: fadd s0, s0, s0 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret +entry: %0 = tail call float asm "fadd ${0:s}, ${0:s}, ${0:s}", "=w,0"(float %x) nounwind ret float %0 } @@ -49,19 +71,32 @@ entry: ; rdar://9553599 define zeroext i8 @t6(i8* %src) nounwind { -entry: ; CHECK-LABEL: t6: -; CHECK: ldtrb {{w[0-9]+}}, [{{x[0-9]+}}] +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ldtrb w8, [x0] +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: ret +entry: %0 = tail call i8 asm "ldtrb ${0:w}, [$1]", "=r,r"(i8* %src) nounwind ret i8 %0 } define void @t7(i8* %f, i32 %g) nounwind { +; CHECK-LABEL: t7: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: str x0, [sp, #8] +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: str w1, [x8] +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret entry: %f.addr = alloca i8*, align 8 store i8* %f, i8** %f.addr, align 8 - ; CHECK-LABEL: t7: - ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}] call void asm "str ${1:w}, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind ret void } @@ -70,144 +105,261 @@ entry: ; ARM64TargetLowering::getRegForInlineAsmConstraint() should recognize 'v' ; registers. define void @t8() nounwind ssp { -entry: ; CHECK-LABEL: t8: -; CHECK: stp {{d[0-9]+}}, {{d[0-9]+}}, [sp, #-16] +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp d9, d8, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: nop +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldp d9, d8, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +entry: tail call void asm sideeffect "nop", "~{v8}"() nounwind ret void } define i32 @constraint_I(i32 %i, i32 %j) nounwind { +; CHECK-LABEL: constraint_I: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: add w8, w0, 16773120 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: add w0, w0, 4096 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: constraint_I: %0 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 16773120) nounwind - ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, 16773120 %1 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 4096) nounwind - ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, 4096 ret i32 %1 } define i32 @constraint_J(i32 %i, i32 %j, i64 %k) nounwind { +; CHECK-LABEL: constraint_J: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: sub w8, w0, -16773120 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: sub w0, w0, -1 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: sub x8, x2, -1 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: sub x8, x2, -1 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: constraint_J: %0 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -16773120) nounwind - ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, -16773120 %1 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -1) nounwind - ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, -1 %2 = tail call i64 asm sideeffect "sub ${0:x}, ${1:x}, $2", "=r,r,J"(i64 %k, i32 -1) nounwind - ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, -1 %3 = tail call i64 asm sideeffect "sub ${0:x}, ${1:x}, $2", "=r,r,J"(i64 %k, i64 -1) nounwind - ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, -1 ret i32 %1 } define i32 @constraint_KL(i32 %i, i32 %j) nounwind { +; CHECK-LABEL: constraint_KL: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: eor w8, w0, 255 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: eor w0, w0, 16711680 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: constraint_KL: %0 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,K"(i32 %i, i32 255) nounwind - ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, 255 %1 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,L"(i32 %i, i64 16711680) nounwind - ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, 16711680 ret i32 %1 } define i32 @constraint_MN(i32 %i, i32 %j) nounwind { +; CHECK-LABEL: constraint_MN: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: movk w8, 65535 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: movz w0, 0 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: constraint_MN: %0 = tail call i32 asm sideeffect "movk ${0:w}, $1", "=r,M"(i32 65535) nounwind - ; CHECK: movk {{w[0-9]+}}, 65535 %1 = tail call i32 asm sideeffect "movz ${0:w}, $1", "=r,N"(i64 0) nounwind - ; CHECK: movz {{w[0-9]+}}, 0 ret i32 %1 } define void @t9() nounwind { +; CHECK-LABEL: t9: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov.2d v4, v0 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: t9: %data = alloca <2 x double>, align 16 %0 = load <2 x double>, <2 x double>* %data, align 16 call void asm sideeffect "mov.2d v4, $0\0A", "w,~{v4}"(<2 x double> %0) nounwind - ; CHECK: mov.2d v4, {{v[0-9]+}} ret void } define void @t10() nounwind { +; CHECK-LABEL: t10: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr d0, [sp, #8] +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ldr z0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ldr q0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ldr d0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ldr s0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ldr h0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ldr b0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: t10: %data = alloca <2 x float>, align 8 %a = alloca [2 x float], align 4 %arraydecay = getelementptr inbounds [2 x float], [2 x float]* %a, i32 0, i32 0 %0 = load <2 x float>, <2 x float>* %data, align 8 call void asm sideeffect "ldr ${1:z}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind - ; CHECK: ldr {{z[0-9]+}}, [{{x[0-9]+}}] call void asm sideeffect "ldr ${1:q}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind - ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] call void asm sideeffect "ldr ${1:d}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind - ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}] call void asm sideeffect "ldr ${1:s}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind - ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}] call void asm sideeffect "ldr ${1:h}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind - ; CHECK: ldr {{h[0-9]+}}, [{{x[0-9]+}}] call void asm sideeffect "ldr ${1:b}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind - ; CHECK: ldr {{b[0-9]+}}, [{{x[0-9]+}}] ret void } define void @t11() nounwind { +; CHECK-LABEL: t11: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr w8, [sp, #12] +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov xzr, x8 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr w8, [sp, #12] +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov wzr, w8 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: t11: %a = alloca i32, align 4 %0 = load i32, i32* %a, align 4 call void asm sideeffect "mov ${1:x}, ${0:x}\0A", "r,i"(i32 %0, i32 0) nounwind - ; CHECK: mov xzr, {{x[0-9]+}} %1 = load i32, i32* %a, align 4 call void asm sideeffect "mov ${1:w}, ${0:w}\0A", "r,i"(i32 %1, i32 0) nounwind - ; CHECK: mov wzr, {{w[0-9]+}} ret void } define void @t12() nounwind { +; CHECK-LABEL: t12: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov.2d v4, v0 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: t12: %data = alloca <4 x float>, align 16 %0 = load <4 x float>, <4 x float>* %data, align 16 call void asm sideeffect "mov.2d v4, $0\0A", "x,~{v4}"(<4 x float> %0) nounwind - ; CHECK: mov.2d v4, {{v([0-9])|(1[0-5])}} ret void } define void @t13() nounwind { +; CHECK-LABEL: t13: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov x4, 1311673391471656960 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov x4, -4662 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov x4, 4660 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov x4, -71777214294589696 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: t13: tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 1311673391471656960) nounwind - ; CHECK: mov x4, 1311673391471656960 tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 -4662) nounwind - ; CHECK: mov x4, -4662 tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 4660) nounwind - ; CHECK: mov x4, 4660 call void asm sideeffect "mov x4, $0\0A", "N"(i64 -71777214294589696) nounwind - ; CHECK: mov x4, -71777214294589696 ret void } define void @t14() nounwind { +; CHECK-LABEL: t14: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov w4, 305397760 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov w4, 4294962634 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov w4, 4660 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov w4, 4278255360 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: - ; CHECK-LABEL: t14: tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 305397760) nounwind - ; CHECK: mov w4, 305397760 tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 -4662) nounwind - ; CHECK: mov w4, 4294962634 tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 4660) nounwind - ; CHECK: mov w4, 4660 call void asm sideeffect "mov w4, $0\0A", "M"(i32 -16711936) nounwind - ; CHECK: mov w4, 4278255360 ret void } define void @t15() nounwind { +; CHECK-LABEL: t15: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: fmov x8, d8 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: %0 = tail call double asm sideeffect "fmov $0, d8", "=r"() nounwind - ; CHECK: fmov {{x[0-9]+}}, d8 ret void } @@ -215,81 +367,134 @@ entry: define void @test_zero_reg(i32* %addr) { ; CHECK-LABEL: test_zero_reg: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: USE(xzr) +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: USE(wzr) +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: USE(w8) +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: USE(xzr), USE(xzr) +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: USE(xzr), USE(wzr) +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret tail call void asm sideeffect "USE($0)", "z"(i32 0) nounwind -; CHECK: USE(xzr) tail call void asm sideeffect "USE(${0:w})", "zr"(i32 0) -; CHECK: USE(wzr) tail call void asm sideeffect "USE(${0:w})", "zr"(i32 1) -; CHECK: mov [[VAL1:w[0-9]+]], #1 -; CHECK: USE([[VAL1]]) tail call void asm sideeffect "USE($0), USE($1)", "z,z"(i32 0, i32 0) nounwind -; CHECK: USE(xzr), USE(xzr) tail call void asm sideeffect "USE($0), USE(${1:w})", "z,z"(i32 0, i32 0) nounwind -; CHECK: USE(xzr), USE(wzr) ret void } define <2 x float> @test_vreg_64bit(<2 x float> %in) nounwind { - ; CHECK-LABEL: test_vreg_64bit: +; CHECK-LABEL: test_vreg_64bit: +; CHECK: ; %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: fadd v14.2s, v0.2s, v0.2s +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: fmov d0, d14 +; CHECK-NEXT: ldp d15, d14, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret %1 = tail call <2 x float> asm sideeffect "fadd ${0}.2s, ${1}.2s, ${1}.2s", "={v14},w"(<2 x float> %in) nounwind - ; CHECK: fadd v14.2s, v0.2s, v0.2s ret <2 x float> %1 } define <4 x float> @test_vreg_128bit(<4 x float> %in) nounwind { - ; CHECK-LABEL: test_vreg_128bit: +; CHECK-LABEL: test_vreg_128bit: +; CHECK: ; %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: fadd v14.4s, v0.4s, v0.4s +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov.16b v0, v14 +; CHECK-NEXT: ldp d15, d14, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret %1 = tail call <4 x float> asm sideeffect "fadd ${0}.4s, ${1}.4s, ${1}.4s", "={v14},w"(<4 x float> %in) nounwind - ; CHECK: fadd v14.4s, v0.4s, v0.4s ret <4 x float> %1 } define void @test_constraint_w(i32 %a) { - ; CHECK: fmov [[SREG:s[0-9]+]], {{w[0-9]+}} - ; CHECK: sqxtn h0, [[SREG]] +; CHECK-LABEL: test_constraint_w: +; CHECK: ; %bb.0: +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: sqxtn h0, s0 +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret tail call void asm sideeffect "sqxtn h0, ${0:s}\0A", "w"(i32 %a) ret void } define void @test_inline_modifier_a(i8* %ptr) nounwind { - ; CHECK-LABEL: test_inline_modifier_a: +; CHECK-LABEL: test_inline_modifier_a: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: prfm pldl1keep, [x0] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret tail call void asm sideeffect "prfm pldl1keep, ${0:a}\0A", "r"(i8* %ptr) - ; CHECK: prfm pldl1keep, [x0] ret void } ; PR33134 define void @test_zero_address() { +; CHECK-LABEL: test_zero_address: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ldr x8, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_zero_address -; CHECK: mov {{x[0-9]+}}, xzr -; CHECK: ldr {{x[0-9]+}}, {{[x[0-9]+]}} tail call i32 asm sideeffect "ldr $0, $1 \0A", "=r,*Q"(i32* null) ret void } ; No '#' in lane specifier define void @test_no_hash_in_lane_specifier() { -; CHECK-LABEL: test_no_hash_in_lane_specifier -; CHECK: fmla v2.4s, v0.4s, v1.s[1] -; CHECK: ret +; CHECK-LABEL: test_no_hash_in_lane_specifier: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: fmla v2.4s, v0.4s, v1.s[1] +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret tail call void asm sideeffect "fmla v2.4s, v0.4s, v1.s[$0]", "I"(i32 1) #1 ret void } + define void @test_vector_too_large_r_m(<9 x float>* nocapture readonly %0) { -; CHECK-LABEL: test_vector_too_large_r_m -; CHECK: ldr [[S:s[0-9]+]], [x0, #32] -; CHECK-DAG: ldp [[Q0:q[0-9]+]], [[Q1:q[0-9]+]], [x0] -; CHECK: str [[S]], [sp, #32] +; CHECK-LABEL: test_vector_too_large_r_m: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: ldr s0, [x0, #32] +; CHECK-NEXT: ldp q2, q1, [x0] +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: str s0, [sp, #32] +; CHECK-NEXT: stp q2, q1, [sp] +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret ; CHECK-DAG stp [[Q0]], [[Q1]], [sp] -; CHECK: ; InlineAsm Start -; entry: %m.addr = alloca <9 x float>, align 16 %m = load <9 x float>, <9 x float>* %0, align 16 @@ -300,9 +505,15 @@ entry: define void @test_o_output_constraint() { ; CHECK-LABEL: test_o_output_constraint: -; CHECK: sub sp, sp, #16 -; CHECK: add x[[REG:[0-9]+]], sp, #15 -; CHECK: mov [x[[REG]]], 7 +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: add x8, sp, #15 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: mov [x8], 7 +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret %b = alloca i8, align 1 call void asm "mov $0, 7", "=*o"(i8* %b) ret void diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp.ll b/llvm/test/CodeGen/AArch64/arm64-ldp.ll index 388f18b..6abde15 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ldp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ldp.ll @@ -1,8 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s -; CHECK-LABEL: ldp_int -; CHECK: ldp define i32 @ldp_int(i32* %p) nounwind { +; CHECK-LABEL: ldp_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp w8, w9, [x0] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret %tmp = load i32, i32* %p, align 4 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 %tmp1 = load i32, i32* %add.ptr, align 4 @@ -10,9 +14,12 @@ define i32 @ldp_int(i32* %p) nounwind { ret i32 %add } -; CHECK-LABEL: ldp_sext_int -; CHECK: ldpsw define i64 @ldp_sext_int(i32* %p) nounwind { +; CHECK-LABEL: ldp_sext_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ldpsw x8, x9, [x0] +; CHECK-NEXT: add x0, x9, x8 +; CHECK-NEXT: ret %tmp = load i32, i32* %p, align 4 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 %tmp1 = load i32, i32* %add.ptr, align 4 @@ -22,10 +29,14 @@ define i64 @ldp_sext_int(i32* %p) nounwind { ret i64 %add } -; CHECK-LABEL: ldp_half_sext_res0_int: -; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] -; CHECK: sxtw x[[DST1]], w[[DST1]] define i64 @ldp_half_sext_res0_int(i32* %p) nounwind { +; CHECK-LABEL: ldp_half_sext_res0_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp w8, w9, [x0] +; CHECK-NEXT: // kill: def $w8 killed $w8 def $x8 +; CHECK-NEXT: sxtw x8, w8 +; CHECK-NEXT: add x0, x9, x8 +; CHECK-NEXT: ret %tmp = load i32, i32* %p, align 4 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 %tmp1 = load i32, i32* %add.ptr, align 4 @@ -35,10 +46,14 @@ define i64 @ldp_half_sext_res0_int(i32* %p) nounwind { ret i64 %add } -; CHECK-LABEL: ldp_half_sext_res1_int: -; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] -; CHECK: sxtw x[[DST2]], w[[DST2]] define i64 @ldp_half_sext_res1_int(i32* %p) nounwind { +; CHECK-LABEL: ldp_half_sext_res1_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp w8, w9, [x0] +; CHECK-NEXT: // kill: def $w9 killed $w9 def $x9 +; CHECK-NEXT: sxtw x9, w9 +; CHECK-NEXT: add x0, x9, x8 +; CHECK-NEXT: ret %tmp = load i32, i32* %p, align 4 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 %tmp1 = load i32, i32* %add.ptr, align 4 @@ -49,9 +64,12 @@ define i64 @ldp_half_sext_res1_int(i32* %p) nounwind { } -; CHECK-LABEL: ldp_long -; CHECK: ldp define i64 @ldp_long(i64* %p) nounwind { +; CHECK-LABEL: ldp_long: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x9, [x0] +; CHECK-NEXT: add x0, x9, x8 +; CHECK-NEXT: ret %tmp = load i64, i64* %p, align 8 %add.ptr = getelementptr inbounds i64, i64* %p, i64 1 %tmp1 = load i64, i64* %add.ptr, align 8 @@ -59,9 +77,12 @@ define i64 @ldp_long(i64* %p) nounwind { ret i64 %add } -; CHECK-LABEL: ldp_float -; CHECK: ldp define float @ldp_float(float* %p) nounwind { +; CHECK-LABEL: ldp_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp s0, s1, [x0] +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: ret %tmp = load float, float* %p, align 4 %add.ptr = getelementptr inbounds float, float* %p, i64 1 %tmp1 = load float, float* %add.ptr, align 4 @@ -69,9 +90,12 @@ define float @ldp_float(float* %p) nounwind { ret float %add } -; CHECK-LABEL: ldp_double -; CHECK: ldp define double @ldp_double(double* %p) nounwind { +; CHECK-LABEL: ldp_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp d0, d1, [x0] +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: ret %tmp = load double, double* %p, align 8 %add.ptr = getelementptr inbounds double, double* %p, i64 1 %tmp1 = load double, double* %add.ptr, align 8 @@ -79,9 +103,12 @@ define double @ldp_double(double* %p) nounwind { ret double %add } -; CHECK-LABEL: ldp_doublex2 -; CHECK: ldp define <2 x double> @ldp_doublex2(<2 x double>* %p) nounwind { +; CHECK-LABEL: ldp_doublex2: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret %tmp = load <2 x double>, <2 x double>* %p, align 16 %add.ptr = getelementptr inbounds <2 x double>, <2 x double>* %p, i64 1 %tmp1 = load <2 x double>, <2 x double>* %add.ptr, align 16 @@ -91,10 +118,11 @@ define <2 x double> @ldp_doublex2(<2 x double>* %p) nounwind { ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate define i32 @ldur_int(i32* %a) nounwind { -; CHECK-LABEL: ldur_int -; CHECK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8] -; CHECK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: ldur_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp w9, w8, [x0, #-8] +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 @@ -104,10 +132,11 @@ define i32 @ldur_int(i32* %a) nounwind { } define i64 @ldur_sext_int(i32* %a) nounwind { -; CHECK-LABEL: ldur_sext_int -; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8] -; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: ldur_sext_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ldpsw x9, x8, [x0, #-8] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 @@ -119,11 +148,13 @@ define i64 @ldur_sext_int(i32* %a) nounwind { } define i64 @ldur_half_sext_int_res0(i32* %a) nounwind { -; CHECK-LABEL: ldur_half_sext_int_res0 -; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] -; CHECK: sxtw x[[DST1]], w[[DST1]] -; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: ldur_half_sext_int_res0: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp w9, w8, [x0, #-8] +; CHECK-NEXT: // kill: def $w9 killed $w9 def $x9 +; CHECK-NEXT: sxtw x9, w9 +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 @@ -135,11 +166,13 @@ define i64 @ldur_half_sext_int_res0(i32* %a) nounwind { } define i64 @ldur_half_sext_int_res1(i32* %a) nounwind { -; CHECK-LABEL: ldur_half_sext_int_res1 -; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] -; CHECK: sxtw x[[DST2]], w[[DST2]] -; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: ldur_half_sext_int_res1: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp w9, w8, [x0, #-8] +; CHECK-NEXT: // kill: def $w8 killed $w8 def $x8 +; CHECK-NEXT: sxtw x8, w8 +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 @@ -152,10 +185,11 @@ define i64 @ldur_half_sext_int_res1(i32* %a) nounwind { define i64 @ldur_long(i64* %a) nounwind ssp { -; CHECK-LABEL: ldur_long -; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16] -; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: ldur_long: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x9, x8, [x0, #-16] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %a, i64 -1 %tmp1 = load i64, i64* %p1, align 2 %p2 = getelementptr inbounds i64, i64* %a, i64 -2 @@ -165,10 +199,11 @@ define i64 @ldur_long(i64* %a) nounwind ssp { } define float @ldur_float(float* %a) { -; CHECK-LABEL: ldur_float -; CHECK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8] -; CHECK-NEXT: fadd s{{[0-9]+}}, [[DST2]], [[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: ldur_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp s1, s0, [x0, #-8] +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: ret %p1 = getelementptr inbounds float, float* %a, i64 -1 %tmp1 = load float, float* %p1, align 2 %p2 = getelementptr inbounds float, float* %a, i64 -2 @@ -178,10 +213,11 @@ define float @ldur_float(float* %a) { } define double @ldur_double(double* %a) { -; CHECK-LABEL: ldur_double -; CHECK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16] -; CHECK-NEXT: fadd d{{[0-9]+}}, [[DST2]], [[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: ldur_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp d1, d0, [x0, #-16] +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: ret %p1 = getelementptr inbounds double, double* %a, i64 -1 %tmp1 = load double, double* %p1, align 2 %p2 = getelementptr inbounds double, double* %a, i64 -2 @@ -191,10 +227,11 @@ define double @ldur_double(double* %a) { } define <2 x double> @ldur_doublex2(<2 x double>* %a) { -; CHECK-LABEL: ldur_doublex2 -; CHECK: ldp q[[DST1:[0-9]+]], q[[DST2:[0-9]+]], [x0, #-32] -; CHECK-NEXT: fadd v{{[0-9]+}}.2d, v[[DST2]].2d, v[[DST1]].2d -; CHECK-NEXT: ret +; CHECK-LABEL: ldur_doublex2: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q1, q0, [x0, #-32] +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret %p1 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -1 %tmp1 = load <2 x double>, <2 x double>* %p1, align 2 %p2 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -2 @@ -205,11 +242,11 @@ define <2 x double> @ldur_doublex2(<2 x double>* %a) { ; Now check some boundary conditions define i64 @pairUpBarelyIn(i64* %a) nounwind ssp { -; CHECK-LABEL: pairUpBarelyIn -; CHECK-NOT: ldur -; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] -; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: pairUpBarelyIn: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x9, x8, [x0, #-256] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %a, i64 -31 %tmp1 = load i64, i64* %p1, align 2 %p2 = getelementptr inbounds i64, i64* %a, i64 -32 @@ -219,11 +256,11 @@ define i64 @pairUpBarelyIn(i64* %a) nounwind ssp { } define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp { -; CHECK-LABEL: pairUpBarelyInSext -; CHECK-NOT: ldur -; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] -; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: pairUpBarelyInSext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldpsw x9, x8, [x0, #-256] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -63 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 @@ -235,12 +272,13 @@ define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp { } define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp { -; CHECK-LABEL: pairUpBarelyInHalfSextRes0 -; CHECK-NOT: ldur -; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] -; CHECK: sxtw x[[DST1]], w[[DST1]] -; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: pairUpBarelyInHalfSextRes0: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp w9, w8, [x0, #-256] +; CHECK-NEXT: // kill: def $w9 killed $w9 def $x9 +; CHECK-NEXT: sxtw x9, w9 +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -63 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 @@ -252,12 +290,13 @@ define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp { } define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp { -; CHECK-LABEL: pairUpBarelyInHalfSextRes1 -; CHECK-NOT: ldur -; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] -; CHECK: sxtw x[[DST2]], w[[DST2]] -; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] -; CHECK-NEXT: ret +; CHECK-LABEL: pairUpBarelyInHalfSextRes1: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp w9, w8, [x0, #-256] +; CHECK-NEXT: // kill: def $w8 killed $w8 def $x8 +; CHECK-NEXT: sxtw x8, w8 +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -63 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 @@ -269,12 +308,15 @@ define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp { } define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { -; CHECK-LABEL: pairUpBarelyOut -; CHECK-NOT: ldp ; Don't be fragile about which loads or manipulations of the base register ; are used---just check that there isn't an ldp before the add -; CHECK: add -; CHECK-NEXT: ret +; CHECK-LABEL: pairUpBarelyOut: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x9, x0, #264 +; CHECK-NEXT: ldur x8, [x0, #-256] +; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %a, i64 -32 %tmp1 = load i64, i64* %p1, align 2 %p2 = getelementptr inbounds i64, i64* %a, i64 -33 @@ -284,12 +326,15 @@ define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { } define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp { -; CHECK-LABEL: pairUpBarelyOutSext -; CHECK-NOT: ldp ; Don't be fragile about which loads or manipulations of the base register ; are used---just check that there isn't an ldp before the add -; CHECK: add -; CHECK-NEXT: ret +; CHECK-LABEL: pairUpBarelyOutSext: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x9, x0, #260 +; CHECK-NEXT: ldursw x8, [x0, #-256] +; CHECK-NEXT: ldrsw x9, [x9] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -64 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i64 -65 @@ -301,12 +346,12 @@ define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp { } define i64 @pairUpNotAligned(i64* %a) nounwind ssp { -; CHECK-LABEL: pairUpNotAligned -; CHECK-NOT: ldp -; CHECK: ldur -; CHECK-NEXT: ldur -; CHECK-NEXT: add -; CHECK-NEXT: ret +; CHECK-LABEL: pairUpNotAligned: +; CHECK: // %bb.0: +; CHECK-NEXT: ldur x8, [x0, #-143] +; CHECK-NEXT: ldur x9, [x0, #-135] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %a, i64 -18 %bp1 = bitcast i64* %p1 to i8* %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 @@ -324,12 +369,12 @@ define i64 @pairUpNotAligned(i64* %a) nounwind ssp { } define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp { -; CHECK-LABEL: pairUpNotAlignedSext -; CHECK-NOT: ldp -; CHECK: ldursw -; CHECK-NEXT: ldursw -; CHECK-NEXT: add -; CHECK-NEXT: ret +; CHECK-LABEL: pairUpNotAlignedSext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldursw x8, [x0, #-71] +; CHECK-NEXT: ldursw x9, [x0, #-67] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -18 %bp1 = bitcast i32* %p1 to i8* %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 @@ -350,9 +395,17 @@ define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp { declare void @use-ptr(i32*) -; CHECK-LABEL: ldp_sext_int_pre -; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8] define i64 @ldp_sext_int_pre(i32* %p) nounwind { +; CHECK-LABEL: ldp_sext_int_pre: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: add x0, x0, #8 +; CHECK-NEXT: bl "use-ptr" +; CHECK-NEXT: ldpsw x8, x9, [x19, #8] +; CHECK-NEXT: add x0, x9, x8 +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret %ptr = getelementptr inbounds i32, i32* %p, i64 2 call void @use-ptr(i32* %ptr) %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0 @@ -365,9 +418,17 @@ define i64 @ldp_sext_int_pre(i32* %p) nounwind { ret i64 %add } -; CHECK-LABEL: ldp_sext_int_post -; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8 define i64 @ldp_sext_int_post(i32* %p) nounwind { +; CHECK-LABEL: ldp_sext_int_post: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldpsw x19, x20, [x0], #8 +; CHECK-NEXT: bl "use-ptr" +; CHECK-NEXT: add x0, x20, x19 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret %tmp = load i32, i32* %p, align 4 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 %tmp1 = load i32, i32* %add.ptr, align 4 diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll index 0ebd3526..61837e64 100644 --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -1,61 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s define void @bzero_4_heap(i8* nocapture %c) { ; CHECK-LABEL: bzero_4_heap: -; CHECK: str wzr, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: str wzr, [x0] +; CHECK-NEXT: ret call void @llvm.memset.p0i8.i64(i8* align 4 %c, i8 0, i64 4, i1 false) ret void } define void @bzero_8_heap(i8* nocapture %c) { ; CHECK-LABEL: bzero_8_heap: -; CHECK: str xzr, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: str xzr, [x0] +; CHECK-NEXT: ret call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 8, i1 false) ret void } define void @bzero_12_heap(i8* nocapture %c) { ; CHECK-LABEL: bzero_12_heap: -; CHECK: str wzr, [x0, #8] -; CHECK-NEXT: str xzr, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: str wzr, [x0, #8] +; CHECK-NEXT: str xzr, [x0] +; CHECK-NEXT: ret call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false) ret void } define void @bzero_16_heap(i8* nocapture %c) { ; CHECK-LABEL: bzero_16_heap: -; CHECK: stp xzr, xzr, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: stp xzr, xzr, [x0] +; CHECK-NEXT: ret call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 16, i1 false) ret void } define void @bzero_32_heap(i8* nocapture %c) { ; CHECK-LABEL: bzero_32_heap: -; CHECK: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: stp q0, q0, [x0] +; CHECK-NEXT: ret call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 32, i1 false) ret void } define void @bzero_64_heap(i8* nocapture %c) { ; CHECK-LABEL: bzero_64_heap: -; CHECK: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x0, #32] -; CHECK-NEXT: stp q0, q0, [x0] -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: stp q0, q0, [x0, #32] +; CHECK-NEXT: stp q0, q0, [x0] +; CHECK-NEXT: ret call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 64, i1 false) ret void } define void @bzero_4_stack() { ; CHECK-LABEL: bzero_4_stack: -; CHECK: str wzr, [sp, #12] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: str wzr, [sp, #12] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %buf = alloca [4 x i8], align 1 %cast = bitcast [4 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i1 false) @@ -65,8 +79,14 @@ define void @bzero_4_stack() { define void @bzero_8_stack() { ; CHECK-LABEL: bzero_8_stack: -; CHECK: stp x30, xzr, [sp, #-16]! -; CHECK: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %buf = alloca [8 x i8], align 1 %cast = bitcast [8 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 8, i1 false) @@ -76,9 +96,18 @@ define void @bzero_8_stack() { define void @bzero_12_stack() { ; CHECK-LABEL: bzero_12_stack: -; CHECK: str wzr, [sp, #8] -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: str wzr, [sp, #8] +; CHECK-NEXT: str xzr, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret %buf = alloca [12 x i8], align 1 %cast = bitcast [12 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 12, i1 false) @@ -88,10 +117,17 @@ define void @bzero_12_stack() { define void @bzero_16_stack() { ; CHECK-LABEL: bzero_16_stack: -; CHECK: stp xzr, x30, [sp, #8] -; CHECK: mov x0, sp -; CHECK: str xzr, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp xzr, x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: str xzr, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret %buf = alloca [16 x i8], align 1 %cast = bitcast [16 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 16, i1 false) @@ -101,9 +137,18 @@ define void @bzero_16_stack() { define void @bzero_20_stack() { ; CHECK-LABEL: bzero_20_stack: -; CHECK: stp xzr, xzr, [sp, #8] -; CHECK-NEXT: str wzr, [sp, #24] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: stp xzr, xzr, [sp, #8] +; CHECK-NEXT: str wzr, [sp, #24] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %buf = alloca [20 x i8], align 1 %cast = bitcast [20 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 20, i1 false) @@ -113,10 +158,19 @@ define void @bzero_20_stack() { define void @bzero_26_stack() { ; CHECK-LABEL: bzero_26_stack: -; CHECK: stp xzr, xzr, [sp] -; CHECK-NEXT: strh wzr, [sp, #24] -; CHECK-NEXT: str xzr, [sp, #16] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp xzr, xzr, [sp] +; CHECK-NEXT: strh wzr, [sp, #24] +; CHECK-NEXT: str xzr, [sp, #16] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %buf = alloca [26 x i8], align 1 %cast = bitcast [26 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 26, i1 false) @@ -126,10 +180,18 @@ define void @bzero_26_stack() { define void @bzero_32_stack() { ; CHECK-LABEL: bzero_32_stack: -; CHECK: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %buf = alloca [32 x i8], align 1 %cast = bitcast [32 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 32, i1 false) @@ -139,11 +201,19 @@ define void @bzero_32_stack() { define void @bzero_40_stack() { ; CHECK-LABEL: bzero_40_stack: -; CHECK: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str xzr, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: str xzr, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret %buf = alloca [40 x i8], align 1 %cast = bitcast [40 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 40, i1 false) @@ -153,11 +223,19 @@ define void @bzero_40_stack() { define void @bzero_64_stack() { ; CHECK-LABEL: bzero_64_stack: -; CHECK: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret %buf = alloca [64 x i8], align 1 %cast = bitcast [64 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 64, i1 false) @@ -167,12 +245,20 @@ define void @bzero_64_stack() { define void @bzero_72_stack() { ; CHECK-LABEL: bzero_72_stack: -; CHECK: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str xzr, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: str xzr, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret %buf = alloca [72 x i8], align 1 %cast = bitcast [72 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 72, i1 false) @@ -182,13 +268,21 @@ define void @bzero_72_stack() { define void @bzero_128_stack() { ; CHECK-LABEL: bzero_128_stack: -; CHECK: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #144 +; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #144 +; CHECK-NEXT: ret %buf = alloca [128 x i8], align 1 %cast = bitcast [128 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 128, i1 false) @@ -198,17 +292,26 @@ define void @bzero_128_stack() { define void @bzero_256_stack() { ; CHECK-LABEL: bzero_256_stack: -; CHECK: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #224] -; CHECK-NEXT: stp q0, q0, [sp, #192] -; CHECK-NEXT: stp q0, q0, [sp, #160] -; CHECK-NEXT: stp q0, q0, [sp, #128] -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #272 +; CHECK-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 272 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp q0, q0, [sp, #224] +; CHECK-NEXT: stp q0, q0, [sp, #192] +; CHECK-NEXT: stp q0, q0, [sp, #160] +; CHECK-NEXT: stp q0, q0, [sp, #128] +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #272 +; CHECK-NEXT: ret %buf = alloca [256 x i8], align 1 %cast = bitcast [256 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 256, i1 false) @@ -218,10 +321,16 @@ define void @bzero_256_stack() { define void @memset_4_stack() { ; CHECK-LABEL: memset_4_stack: -; CHECK: mov w8, #-1431655766 -; CHECK-NEXT: add x0, sp, #12 -; CHECK-NEXT: str w8, [sp, #12] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov w8, #-1431655766 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: str w8, [sp, #12] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %buf = alloca [4 x i8], align 1 %cast = bitcast [4 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 4, i1 false) @@ -231,10 +340,15 @@ define void @memset_4_stack() { define void @memset_8_stack() { ; CHECK-LABEL: memset_8_stack: -; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: stp x30, x8, [sp, #-16]! -; CHECK-NEXT: add x0, sp, #8 -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: stp x30, x8, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %buf = alloca [8 x i8], align 1 %cast = bitcast [8 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 8, i1 false) @@ -244,11 +358,19 @@ define void @memset_8_stack() { define void @memset_12_stack() { ; CHECK-LABEL: memset_12_stack: -; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x8, [sp] -; CHECK-NEXT: str w8, [sp, #8] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: str x8, [sp] +; CHECK-NEXT: str w8, [sp, #8] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret %buf = alloca [12 x i8], align 1 %cast = bitcast [12 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 12, i1 false) @@ -258,11 +380,18 @@ define void @memset_12_stack() { define void @memset_16_stack() { ; CHECK-LABEL: memset_16_stack: -; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp x8, x30, [sp, #8] -; CHECK-NEXT: str x8, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp x8, x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret %buf = alloca [16 x i8], align 1 %cast = bitcast [16 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 16, i1 false) @@ -272,11 +401,19 @@ define void @memset_16_stack() { define void @memset_20_stack() { ; CHECK-LABEL: memset_20_stack: -; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: add x0, sp, #8 -; CHECK-NEXT: stp x8, x8, [sp, #8] -; CHECK-NEXT: str w8, [sp, #24] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: stp x8, x8, [sp, #8] +; CHECK-NEXT: str w8, [sp, #24] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %buf = alloca [20 x i8], align 1 %cast = bitcast [20 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 20, i1 false) @@ -286,12 +423,20 @@ define void @memset_20_stack() { define void @memset_26_stack() { ; CHECK-LABEL: memset_26_stack: -; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp x8, x8, [sp, #8] -; CHECK-NEXT: str x8, [sp] -; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp x8, x8, [sp, #8] +; CHECK-NEXT: str x8, [sp] +; CHECK-NEXT: strh w8, [sp, #24] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %buf = alloca [26 x i8], align 1 %cast = bitcast [26 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 26, i1 false) @@ -301,10 +446,18 @@ define void @memset_26_stack() { define void @memset_32_stack() { ; CHECK-LABEL: memset_32_stack: -; CHECK: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: movi v0.16b, #170 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %buf = alloca [32 x i8], align 1 %cast = bitcast [32 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 32, i1 false) @@ -314,12 +467,20 @@ define void @memset_32_stack() { define void @memset_40_stack() { ; CHECK-LABEL: memset_40_stack: -; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x8, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: movi v0.16b, #170 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: str x8, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret %buf = alloca [40 x i8], align 1 %cast = bitcast [40 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 40, i1 false) @@ -329,11 +490,19 @@ define void @memset_40_stack() { define void @memset_64_stack() { ; CHECK-LABEL: memset_64_stack: -; CHECK: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: movi v0.16b, #170 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret %buf = alloca [64 x i8], align 1 %cast = bitcast [64 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 64, i1 false) @@ -343,13 +512,21 @@ define void @memset_64_stack() { define void @memset_72_stack() { ; CHECK-LABEL: memset_72_stack: -; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x8, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: movi v0.16b, #170 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: str x8, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret %buf = alloca [72 x i8], align 1 %cast = bitcast [72 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 72, i1 false) @@ -359,13 +536,21 @@ define void @memset_72_stack() { define void @memset_128_stack() { ; CHECK-LABEL: memset_128_stack: -; CHECK: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #144 +; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: movi v0.16b, #170 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #144 +; CHECK-NEXT: ret %buf = alloca [128 x i8], align 1 %cast = bitcast [128 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 128, i1 false) @@ -375,17 +560,26 @@ define void @memset_128_stack() { define void @memset_256_stack() { ; CHECK-LABEL: memset_256_stack: -; CHECK: movi v0.16b, #170 -; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #224] -; CHECK-NEXT: stp q0, q0, [sp, #192] -; CHECK-NEXT: stp q0, q0, [sp, #160] -; CHECK-NEXT: stp q0, q0, [sp, #128] -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] -; CHECK-NEXT: stp q0, q0, [sp] -; CHECK-NEXT: bl something +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #272 +; CHECK-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 272 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: movi v0.16b, #170 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp q0, q0, [sp, #224] +; CHECK-NEXT: stp q0, q0, [sp, #192] +; CHECK-NEXT: stp q0, q0, [sp, #160] +; CHECK-NEXT: stp q0, q0, [sp, #128] +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: bl something +; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #272 +; CHECK-NEXT: ret %buf = alloca [256 x i8], align 1 %cast = bitcast [256 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 -86, i32 256, i1 false) diff --git a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll index cfd3d6a..5cdcbf1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll @@ -1,11 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm64-apple-ios -mattr=+strict-align < %s | FileCheck %s ; Small (16 bytes here) unaligned memcpy() should be a function call if ; strict-alignment is turned on. define void @t0(i8* %out, i8* %in) { ; CHECK-LABEL: t0: -; CHECK: mov w2, #16 -; CHECK-NEXT: bl _memcpy +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: mov w2, #16 +; CHECK-NEXT: bl _memcpy +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i1 false) ret void diff --git a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll index 5995de2..488ddbf 100644 --- a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll @@ -1,11 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s ; If %base < 96 then the sum will not wrap (in an unsigned sense), but "ldr w0, ; [x0, #-96]" would. define i32 @test_valid_wrap(i32 %base) { ; CHECK-LABEL: test_valid_wrap: -; CHECK: sub w[[ADDR:[0-9]+]], w0, #96 -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: sub w8, w0, #96 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret %newaddr = add nuw i32 %base, -96 %ptr = inttoptr i32 %newaddr to i32* @@ -15,7 +18,9 @@ define i32 @test_valid_wrap(i32 %base) { define i8 @test_valid_wrap_optimizable(i8* %base) { ; CHECK-LABEL: test_valid_wrap_optimizable: -; CHECK: ldurb w0, [x0, #-96] +; CHECK: ; %bb.0: +; CHECK-NEXT: ldurb w0, [x0, #-96] +; CHECK-NEXT: ret %newaddr = getelementptr inbounds i8, i8* %base, i32 -96 %val = load i8, i8* %newaddr @@ -24,7 +29,9 @@ define i8 @test_valid_wrap_optimizable(i8* %base) { define i8 @test_valid_wrap_optimizable1(i8* %base, i32 %offset) { ; CHECK-LABEL: test_valid_wrap_optimizable1: -; CHECK: ldrb w0, [x0, w1, sxtw] +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrb w0, [x0, w1, sxtw] +; CHECK-NEXT: ret %newaddr = getelementptr inbounds i8, i8* %base, i32 %offset %val = load i8, i8* %newaddr @@ -34,9 +41,12 @@ define i8 @test_valid_wrap_optimizable1(i8* %base, i32 %offset) { ; define i8 @test_valid_wrap_optimizable2(i8* %base, i32 %offset) { ; CHECK-LABEL: test_valid_wrap_optimizable2: -; CHECK: sxtw x[[OFFSET:[0-9]+]], w1 -; CHECK: mov w[[BASE:[0-9]+]], #-100 -; CHECK: ldrb w0, [x[[OFFSET]], x[[BASE]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w9, #-100 +; CHECK-NEXT: ldrb w0, [x8, x9] +; CHECK-NEXT: ret %newaddr = getelementptr inbounds i8, i8* inttoptr(i32 -100 to i8*), i32 %offset %val = load i8, i8* %newaddr