From 2002c82278f6fad4374045b849e414327788139f Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 27 Jan 2023 13:18:59 +0000 Subject: [PATCH] AArch64: count callee stack we use when estimating scavenging requirements. --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 13 +- llvm/test/CodeGen/AArch64/arm64-stackmap.ll | 2 +- .../test/CodeGen/AArch64/arm64-subvector-extend.ll | 232 ++-- llvm/test/CodeGen/AArch64/neon-dotreduce.ll | 1464 ++++++++++---------- llvm/test/CodeGen/AArch64/scavenge-large-call.ll | 11 + llvm/test/CodeGen/AArch64/stackmap.ll | 2 +- .../sve-fixed-length-frame-offests-crash.ll | 7 +- 7 files changed, 896 insertions(+), 835 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/scavenge-large-call.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index a1b87c3..603dccd 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3074,9 +3074,18 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // won't include them. unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); + // We may address some of the stack above the canonical frame address, either + // for our own arguments or during a call. Include that in calculating whether + // we have complicated addressing concerns. + int64_t CalleeStackUsed = 0; + for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) { + int64_t FixedOff = MFI.getObjectOffset(I); + if (FixedOff > CalleeStackUsed) CalleeStackUsed = FixedOff; + } + // Conservatively always assume BigStack when there are SVE spills. - bool BigStack = SVEStackSize || - (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit; + bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize + + CalleeStackUsed) > EstimatedStackSizeLimit; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); diff --git a/llvm/test/CodeGen/AArch64/arm64-stackmap.ll b/llvm/test/CodeGen/AArch64/arm64-stackmap.ll index 5036f3f..785c316 100644 --- a/llvm/test/CodeGen/AArch64/arm64-stackmap.ll +++ b/llvm/test/CodeGen/AArch64/arm64-stackmap.ll @@ -46,7 +46,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-NEXT: .quad 160 ; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .quad _spilledStackMapValue -; CHECK-NEXT: .quad 128 +; CHECK-NEXT: .quad 144 ; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .quad _liveConstant ; CHECK-NEXT: .quad 16 diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll index 50dda82..7e8efb6 100644 --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -358,122 +358,125 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) { define <64 x i8> @zext_v64i1(<64 x i1> %arg) { ; CHECK-LABEL: zext_v64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [sp, #320] +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr w8, [sp, #336] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w10, [sp, #208] ; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: ldr w8, [sp, #328] +; CHECK-NEXT: ldr w8, [sp, #344] ; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #200] +; CHECK-NEXT: ldr w9, [sp, #216] ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #336] +; CHECK-NEXT: ldr w10, [sp, #352] ; CHECK-NEXT: mov.b v3[1], w8 -; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: ldr w8, [sp, #88] ; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: ldr w11, [sp, #352] +; CHECK-NEXT: ldr w11, [sp, #368] ; CHECK-NEXT: mov.b v2[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w9, [sp, #96] ; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: ldr w8, [sp, #360] ; CHECK-NEXT: mov.b v3[2], w10 -; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: ldr w10, [sp, #224] ; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: ldr w12, [sp, #368] -; CHECK-NEXT: ldr w13, [sp, #384] +; CHECK-NEXT: ldr w12, [sp, #384] +; CHECK-NEXT: ldr w13, [sp, #400] ; CHECK-NEXT: mov.b v1[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: ldr w9, [sp, #376] ; CHECK-NEXT: mov.b v2[2], w10 -; CHECK-NEXT: ldr w10, [sp, #88] +; CHECK-NEXT: ldr w10, [sp, #104] ; CHECK-NEXT: mov.b v3[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] +; CHECK-NEXT: ldr w8, [sp, #232] ; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: ldr w14, [sp, #400] +; CHECK-NEXT: ldr w14, [sp, #416] ; CHECK-NEXT: mov.b v1[3], w10 -; CHECK-NEXT: ldr w10, [sp, #376] +; CHECK-NEXT: ldr w10, [sp, #392] ; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] +; CHECK-NEXT: ldr w8, [sp, #112] ; CHECK-NEXT: mov.b v3[4], w11 -; CHECK-NEXT: ldr w11, [sp, #224] +; CHECK-NEXT: ldr w11, [sp, #240] ; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: ldr w15, [sp, #416] +; CHECK-NEXT: ldr w15, [sp, #432] ; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #392] +; CHECK-NEXT: ldr w8, [sp, #408] ; CHECK-NEXT: mov.b v2[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] +; CHECK-NEXT: ldr w11, [sp, #120] ; CHECK-NEXT: mov.b v3[5], w9 -; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: ldr w9, [sp, #248] ; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: ldr w16, [sp, #432] +; CHECK-NEXT: ldr w16, [sp, #448] ; CHECK-NEXT: mov.b v1[5], w11 -; CHECK-NEXT: ldr w11, [sp, #408] +; CHECK-NEXT: ldr w11, [sp, #424] ; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] +; CHECK-NEXT: ldr w9, [sp, #128] ; CHECK-NEXT: mov.b v3[6], w12 -; CHECK-NEXT: ldr w12, [sp, #240] +; CHECK-NEXT: ldr w12, [sp, #256] ; CHECK-NEXT: mov.b v0[6], w6 ; CHECK-NEXT: mov.b v1[6], w9 -; CHECK-NEXT: ldr w9, [sp, #424] +; CHECK-NEXT: ldr w9, [sp, #440] ; CHECK-NEXT: mov.b v2[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] +; CHECK-NEXT: ldr w12, [sp, #136] ; CHECK-NEXT: mov.b v3[7], w10 -; CHECK-NEXT: ldr w10, [sp, #248] +; CHECK-NEXT: ldr w10, [sp, #264] ; CHECK-NEXT: mov.b v0[7], w7 ; CHECK-NEXT: mov.b v1[7], w12 -; CHECK-NEXT: ldr w12, [sp] +; CHECK-NEXT: ldr w12, [sp, #16] ; CHECK-NEXT: mov.b v2[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] +; CHECK-NEXT: ldr w10, [sp, #144] ; CHECK-NEXT: mov.b v3[8], w13 -; CHECK-NEXT: ldr w13, [sp, #256] +; CHECK-NEXT: ldr w13, [sp, #272] ; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #440] +; CHECK-NEXT: ldr w12, [sp, #456] ; CHECK-NEXT: mov.b v1[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] +; CHECK-NEXT: ldr w10, [sp, #24] ; CHECK-NEXT: mov.b v2[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] +; CHECK-NEXT: ldr w13, [sp, #152] ; CHECK-NEXT: mov.b v3[9], w8 -; CHECK-NEXT: ldr w8, [sp, #264] +; CHECK-NEXT: ldr w8, [sp, #280] ; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #272] +; CHECK-NEXT: ldr w10, [sp, #288] ; CHECK-NEXT: mov.b v1[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] +; CHECK-NEXT: ldr w13, [sp, #32] ; CHECK-NEXT: mov.b v2[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] +; CHECK-NEXT: ldr w8, [sp, #160] ; CHECK-NEXT: mov.b v3[10], w14 -; CHECK-NEXT: ldr w14, [sp, #280] +; CHECK-NEXT: ldr w14, [sp, #296] ; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #296] +; CHECK-NEXT: ldr w13, [sp, #312] ; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: ldr w8, [sp, #40] ; CHECK-NEXT: mov.b v2[10], w10 -; CHECK-NEXT: ldr w10, [sp, #152] +; CHECK-NEXT: ldr w10, [sp, #168] ; CHECK-NEXT: mov.b v3[11], w11 -; CHECK-NEXT: ldr w11, [sp, #288] +; CHECK-NEXT: ldr w11, [sp, #304] ; CHECK-NEXT: mov.b v0[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: ldr w8, [sp, #48] ; CHECK-NEXT: mov.b v1[11], w10 -; CHECK-NEXT: ldr w10, [sp, #160] +; CHECK-NEXT: ldr w10, [sp, #176] ; CHECK-NEXT: mov.b v2[11], w14 ; CHECK-NEXT: mov.b v3[12], w15 ; CHECK-NEXT: mov.b v0[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: ldr w8, [sp, #56] ; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] +; CHECK-NEXT: ldr w10, [sp, #184] ; CHECK-NEXT: mov.b v2[12], w11 -; CHECK-NEXT: ldr w11, [sp, #312] +; CHECK-NEXT: ldr w11, [sp, #328] ; CHECK-NEXT: mov.b v3[13], w9 -; CHECK-NEXT: ldr w9, [sp, #304] +; CHECK-NEXT: ldr w9, [sp, #320] ; CHECK-NEXT: mov.b v0[13], w8 -; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] +; CHECK-NEXT: ldr w10, [sp, #192] ; CHECK-NEXT: mov.b v2[13], w13 ; CHECK-NEXT: mov.b v3[14], w16 ; CHECK-NEXT: mov.b v0[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: ldr w8, [sp, #72] ; CHECK-NEXT: mov.b v1[14], w10 ; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] +; CHECK-NEXT: ldr w9, [sp, #200] ; CHECK-NEXT: movi.16b v4, #1 ; CHECK-NEXT: mov.b v0[15], w8 ; CHECK-NEXT: mov.b v1[15], w9 @@ -483,6 +486,7 @@ define <64 x i8> @zext_v64i1(<64 x i1> %arg) { ; CHECK-NEXT: and.16b v1, v1, v4 ; CHECK-NEXT: and.16b v2, v2, v4 ; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = zext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res @@ -491,121 +495,124 @@ define <64 x i8> @zext_v64i1(<64 x i1> %arg) { define <64 x i8> @sext_v64i1(<64 x i1> %arg) { ; CHECK-LABEL: sext_v64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [sp, #320] +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr w8, [sp, #336] ; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w10, [sp, #208] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldr w8, [sp, #328] +; CHECK-NEXT: ldr w8, [sp, #344] ; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: ldr w9, [sp, #88] ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #80] +; CHECK-NEXT: ldr w10, [sp, #96] ; CHECK-NEXT: mov.b v0[1], w8 -; CHECK-NEXT: ldr w8, [sp, #200] +; CHECK-NEXT: ldr w8, [sp, #216] ; CHECK-NEXT: mov.b v1[1], w9 -; CHECK-NEXT: ldr w9, [sp, #336] +; CHECK-NEXT: ldr w9, [sp, #352] ; CHECK-NEXT: mov.b v3[1], w1 -; CHECK-NEXT: ldr w11, [sp, #88] +; CHECK-NEXT: ldr w11, [sp, #104] ; CHECK-NEXT: mov.b v2[1], w8 -; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: ldr w8, [sp, #360] ; CHECK-NEXT: mov.b v0[2], w9 -; CHECK-NEXT: ldr w9, [sp, #208] +; CHECK-NEXT: ldr w9, [sp, #224] ; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ldr w10, [sp, #352] +; CHECK-NEXT: ldr w10, [sp, #368] ; CHECK-NEXT: mov.b v3[2], w2 -; CHECK-NEXT: ldr w12, [sp, #96] +; CHECK-NEXT: ldr w12, [sp, #112] ; CHECK-NEXT: mov.b v2[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: ldr w9, [sp, #376] ; CHECK-NEXT: mov.b v0[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] +; CHECK-NEXT: ldr w8, [sp, #232] ; CHECK-NEXT: mov.b v1[3], w11 -; CHECK-NEXT: ldr w13, [sp, #104] +; CHECK-NEXT: ldr w13, [sp, #120] ; CHECK-NEXT: mov.b v3[3], w3 -; CHECK-NEXT: ldr w11, [sp, #368] +; CHECK-NEXT: ldr w11, [sp, #384] ; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w14, [sp, #112] +; CHECK-NEXT: ldr w14, [sp, #128] ; CHECK-NEXT: mov.b v0[4], w10 -; CHECK-NEXT: ldr w10, [sp, #224] +; CHECK-NEXT: ldr w10, [sp, #240] ; CHECK-NEXT: mov.b v1[4], w12 -; CHECK-NEXT: ldr w8, [sp, #376] +; CHECK-NEXT: ldr w8, [sp, #392] ; CHECK-NEXT: mov.b v3[4], w4 -; CHECK-NEXT: ldr w15, [sp, #120] +; CHECK-NEXT: ldr w15, [sp, #136] ; CHECK-NEXT: mov.b v2[4], w10 -; CHECK-NEXT: ldr w12, [sp, #384] +; CHECK-NEXT: ldr w12, [sp, #400] ; CHECK-NEXT: mov.b v0[5], w9 -; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: ldr w9, [sp, #248] ; CHECK-NEXT: mov.b v1[5], w13 -; CHECK-NEXT: ldr w16, [sp, #128] +; CHECK-NEXT: ldr w16, [sp, #144] ; CHECK-NEXT: mov.b v3[5], w5 -; CHECK-NEXT: ldr w10, [sp, #392] +; CHECK-NEXT: ldr w10, [sp, #408] ; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w13, [sp, #400] +; CHECK-NEXT: ldr w13, [sp, #416] ; CHECK-NEXT: mov.b v0[6], w11 -; CHECK-NEXT: ldr w11, [sp, #240] +; CHECK-NEXT: ldr w11, [sp, #256] ; CHECK-NEXT: mov.b v1[6], w14 -; CHECK-NEXT: ldr w9, [sp, #408] +; CHECK-NEXT: ldr w9, [sp, #424] ; CHECK-NEXT: mov.b v3[6], w6 -; CHECK-NEXT: ldr w14, [sp, #416] +; CHECK-NEXT: ldr w14, [sp, #432] ; CHECK-NEXT: mov.b v2[6], w11 -; CHECK-NEXT: ldr w11, [sp, #424] +; CHECK-NEXT: ldr w11, [sp, #440] ; CHECK-NEXT: mov.b v0[7], w8 -; CHECK-NEXT: ldr w8, [sp, #248] +; CHECK-NEXT: ldr w8, [sp, #264] ; CHECK-NEXT: mov.b v1[7], w15 -; CHECK-NEXT: ldr w15, [sp, #432] +; CHECK-NEXT: ldr w15, [sp, #448] ; CHECK-NEXT: mov.b v3[7], w7 ; CHECK-NEXT: mov.b v2[7], w8 -; CHECK-NEXT: ldr w8, [sp] +; CHECK-NEXT: ldr w8, [sp, #16] ; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #256] +; CHECK-NEXT: ldr w12, [sp, #272] ; CHECK-NEXT: mov.b v1[8], w16 -; CHECK-NEXT: ldr w16, [sp, #440] +; CHECK-NEXT: ldr w16, [sp, #456] ; CHECK-NEXT: mov.b v3[8], w8 -; CHECK-NEXT: ldr w8, [sp, #136] +; CHECK-NEXT: ldr w8, [sp, #152] ; CHECK-NEXT: mov.b v2[8], w12 -; CHECK-NEXT: ldr w12, [sp, #8] +; CHECK-NEXT: ldr w12, [sp, #24] ; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #264] +; CHECK-NEXT: ldr w10, [sp, #280] ; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #272] +; CHECK-NEXT: ldr w8, [sp, #288] ; CHECK-NEXT: mov.b v3[9], w12 -; CHECK-NEXT: ldr w12, [sp, #144] +; CHECK-NEXT: ldr w12, [sp, #160] ; CHECK-NEXT: mov.b v2[9], w10 -; CHECK-NEXT: ldr w10, [sp, #16] +; CHECK-NEXT: ldr w10, [sp, #32] ; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #280] +; CHECK-NEXT: ldr w13, [sp, #296] ; CHECK-NEXT: mov.b v1[10], w12 -; CHECK-NEXT: ldr w12, [sp, #152] +; CHECK-NEXT: ldr w12, [sp, #168] ; CHECK-NEXT: mov.b v3[10], w10 -; CHECK-NEXT: ldr w10, [sp, #160] +; CHECK-NEXT: ldr w10, [sp, #176] ; CHECK-NEXT: mov.b v2[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: ldr w8, [sp, #40] ; CHECK-NEXT: mov.b v0[11], w9 -; CHECK-NEXT: ldr w9, [sp, #288] +; CHECK-NEXT: ldr w9, [sp, #304] ; CHECK-NEXT: mov.b v1[11], w12 -; CHECK-NEXT: ldr w12, [sp, #296] +; CHECK-NEXT: ldr w12, [sp, #312] ; CHECK-NEXT: mov.b v3[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: ldr w8, [sp, #48] ; CHECK-NEXT: mov.b v2[11], w13 ; CHECK-NEXT: mov.b v0[12], w14 ; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] +; CHECK-NEXT: ldr w10, [sp, #184] ; CHECK-NEXT: mov.b v3[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: ldr w8, [sp, #56] ; CHECK-NEXT: mov.b v2[12], w9 -; CHECK-NEXT: ldr w9, [sp, #304] +; CHECK-NEXT: ldr w9, [sp, #320] ; CHECK-NEXT: mov.b v0[13], w11 -; CHECK-NEXT: ldr w11, [sp, #312] +; CHECK-NEXT: ldr w11, [sp, #328] ; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] +; CHECK-NEXT: ldr w10, [sp, #192] ; CHECK-NEXT: mov.b v3[13], w8 -; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: mov.b v2[13], w12 ; CHECK-NEXT: mov.b v0[14], w15 ; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: ldr w10, [sp, #184] +; CHECK-NEXT: ldr w10, [sp, #200] ; CHECK-NEXT: mov.b v3[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: ldr w8, [sp, #72] ; CHECK-NEXT: mov.b v2[14], w9 ; CHECK-NEXT: mov.b v0[15], w16 ; CHECK-NEXT: mov.b v1[15], w10 @@ -619,6 +626,7 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) { ; CHECK-NEXT: cmlt.16b v1, v1, #0 ; CHECK-NEXT: cmlt.16b v2, v2, #0 ; CHECK-NEXT: cmlt.16b v3, v4, #0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll index 00b44a8..78d169d 100644 --- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -664,179 +664,182 @@ entry: define i32 @test_sdot_v24i8_double(<24 x i8> %a, <24 x i8> %b, <24 x i8> %c, <24 x i8> %d) { ; CHECK-LABEL: test_sdot_v24i8_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #128] -; CHECK-NEXT: add x8, sp, #136 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b0, [sp, #144] +; CHECK-NEXT: add x8, sp, #152 ; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: add x9, sp, #152 -; CHECK-NEXT: ldr b2, [sp, #256] -; CHECK-NEXT: add x11, sp, #264 +; CHECK-NEXT: add x9, sp, #168 +; CHECK-NEXT: ldr b2, [sp, #272] +; CHECK-NEXT: add x11, sp, #280 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #144 +; CHECK-NEXT: add x8, sp, #160 ; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: add x10, sp, #168 +; CHECK-NEXT: add x10, sp, #184 ; CHECK-NEXT: ld1 { v2.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #520 -; CHECK-NEXT: ldr b4, [sp, #512] -; CHECK-NEXT: add x12, sp, #72 +; CHECK-NEXT: add x11, sp, #536 +; CHECK-NEXT: ldr b4, [sp, #528] +; CHECK-NEXT: add x12, sp, #88 ; CHECK-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #160 +; CHECK-NEXT: add x8, sp, #176 ; CHECK-NEXT: mov v1.b[2], w2 -; CHECK-NEXT: ldr b5, [sp, #320] +; CHECK-NEXT: ldr b5, [sp, #336] ; CHECK-NEXT: ld1 { v4.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #328 -; CHECK-NEXT: ldr b3, [sp, #64] -; CHECK-NEXT: add x13, sp, #80 +; CHECK-NEXT: add x11, sp, #344 +; CHECK-NEXT: ldr b3, [sp, #80] +; CHECK-NEXT: add x13, sp, #96 ; CHECK-NEXT: ld1 { v0.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #176 +; CHECK-NEXT: add x9, sp, #192 ; CHECK-NEXT: mov v1.b[3], w3 ; CHECK-NEXT: ld1 { v5.b }[1], [x11] ; CHECK-NEXT: ld1 { v3.b }[1], [x12] -; CHECK-NEXT: add x12, sp, #184 -; CHECK-NEXT: add x11, sp, #528 -; CHECK-NEXT: ldr b7, [sp, #640] +; CHECK-NEXT: add x12, sp, #200 +; CHECK-NEXT: add x11, sp, #544 +; CHECK-NEXT: ldr b7, [sp, #656] ; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #272 +; CHECK-NEXT: add x8, sp, #288 ; CHECK-NEXT: mov v1.b[4], w4 -; CHECK-NEXT: ldr b16, [sp, #448] +; CHECK-NEXT: ldr b16, [sp, #464] ; CHECK-NEXT: ld1 { v3.b }[2], [x13] ; CHECK-NEXT: ld1 { v2.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #336 +; CHECK-NEXT: add x8, sp, #352 ; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #280 +; CHECK-NEXT: add x10, sp, #296 ; CHECK-NEXT: mov v1.b[5], w5 ; CHECK-NEXT: ld1 { v4.b }[2], [x11] ; CHECK-NEXT: ld1 { v5.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #344 +; CHECK-NEXT: add x8, sp, #360 ; CHECK-NEXT: ld1 { v2.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #192 +; CHECK-NEXT: add x10, sp, #208 ; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #88 +; CHECK-NEXT: add x9, sp, #104 ; CHECK-NEXT: mov v1.b[6], w6 -; CHECK-NEXT: add x11, sp, #536 +; CHECK-NEXT: add x11, sp, #552 ; CHECK-NEXT: ld1 { v5.b }[3], [x8] -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ld1 { v3.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #288 +; CHECK-NEXT: add x9, sp, #304 ; CHECK-NEXT: ld1 { v0.b }[7], [x12] ; CHECK-NEXT: mov v1.b[7], w7 ; CHECK-NEXT: ld1 { v4.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #96 +; CHECK-NEXT: add x11, sp, #112 ; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #544 +; CHECK-NEXT: add x9, sp, #560 ; CHECK-NEXT: ld1 { v0.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #200 +; CHECK-NEXT: add x10, sp, #216 ; CHECK-NEXT: ld1 { v1.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: add x8, sp, #24 ; CHECK-NEXT: ld1 { v3.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #352 +; CHECK-NEXT: add x11, sp, #368 ; CHECK-NEXT: ld1 { v4.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #296 +; CHECK-NEXT: add x9, sp, #312 ; CHECK-NEXT: ld1 { v0.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #208 +; CHECK-NEXT: add x10, sp, #224 ; CHECK-NEXT: ld1 { v1.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: add x8, sp, #32 ; CHECK-NEXT: ld1 { v5.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #104 +; CHECK-NEXT: add x11, sp, #120 ; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #552 +; CHECK-NEXT: add x9, sp, #568 ; CHECK-NEXT: ld1 { v0.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #216 +; CHECK-NEXT: add x10, sp, #232 ; CHECK-NEXT: ld1 { v3.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #360 +; CHECK-NEXT: add x11, sp, #376 ; CHECK-NEXT: ld1 { v1.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: add x8, sp, #40 ; CHECK-NEXT: ld1 { v4.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #304 +; CHECK-NEXT: add x9, sp, #320 ; CHECK-NEXT: ld1 { v0.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #224 +; CHECK-NEXT: add x10, sp, #240 ; CHECK-NEXT: ld1 { v5.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #112 +; CHECK-NEXT: add x11, sp, #128 ; CHECK-NEXT: ld1 { v1.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #32 +; CHECK-NEXT: add x8, sp, #48 ; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #560 +; CHECK-NEXT: add x9, sp, #576 ; CHECK-NEXT: ld1 { v0.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: add x10, sp, #248 ; CHECK-NEXT: ld1 { v3.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #368 +; CHECK-NEXT: add x11, sp, #384 ; CHECK-NEXT: ld1 { v1.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #40 +; CHECK-NEXT: add x8, sp, #56 ; CHECK-NEXT: ld1 { v4.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #312 +; CHECK-NEXT: add x9, sp, #328 ; CHECK-NEXT: ld1 { v5.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #568 +; CHECK-NEXT: add x11, sp, #584 ; CHECK-NEXT: ld1 { v0.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #240 +; CHECK-NEXT: add x10, sp, #256 ; CHECK-NEXT: ld1 { v1.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #48 +; CHECK-NEXT: add x8, sp, #64 ; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #376 +; CHECK-NEXT: add x9, sp, #392 ; CHECK-NEXT: ld1 { v4.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #576 +; CHECK-NEXT: add x11, sp, #592 ; CHECK-NEXT: ld1 { v0.b }[14], [x10] -; CHECK-NEXT: add x10, sp, #248 +; CHECK-NEXT: add x10, sp, #264 ; CHECK-NEXT: ld1 { v5.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #120 +; CHECK-NEXT: add x9, sp, #136 ; CHECK-NEXT: ld1 { v1.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #56 +; CHECK-NEXT: add x8, sp, #72 ; CHECK-NEXT: ld1 { v4.b }[8], [x11] -; CHECK-NEXT: add x11, sp, #384 +; CHECK-NEXT: add x11, sp, #400 ; CHECK-NEXT: ld1 { v0.b }[15], [x10] -; CHECK-NEXT: add x10, sp, #584 +; CHECK-NEXT: add x10, sp, #600 ; CHECK-NEXT: ld1 { v3.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #648 +; CHECK-NEXT: add x9, sp, #664 ; CHECK-NEXT: ld1 { v5.b }[8], [x11] -; CHECK-NEXT: add x11, sp, #592 +; CHECK-NEXT: add x11, sp, #608 ; CHECK-NEXT: ld1 { v1.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #392 +; CHECK-NEXT: add x8, sp, #408 ; CHECK-NEXT: ld1 { v4.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #456 +; CHECK-NEXT: add x10, sp, #472 ; CHECK-NEXT: ld1 { v7.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #656 +; CHECK-NEXT: add x9, sp, #672 ; CHECK-NEXT: ld1 { v5.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #400 +; CHECK-NEXT: add x8, sp, #416 ; CHECK-NEXT: ld1 { v16.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #464 +; CHECK-NEXT: add x10, sp, #480 ; CHECK-NEXT: ld1 { v4.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #600 +; CHECK-NEXT: add x11, sp, #616 ; CHECK-NEXT: ld1 { v7.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #664 +; CHECK-NEXT: add x9, sp, #680 ; CHECK-NEXT: ld1 { v5.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #408 +; CHECK-NEXT: add x8, sp, #424 ; CHECK-NEXT: ld1 { v16.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #472 +; CHECK-NEXT: add x10, sp, #488 ; CHECK-NEXT: ld1 { v4.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #608 +; CHECK-NEXT: add x11, sp, #624 ; CHECK-NEXT: ld1 { v7.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #672 +; CHECK-NEXT: add x9, sp, #688 ; CHECK-NEXT: ld1 { v5.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #416 +; CHECK-NEXT: add x8, sp, #432 ; CHECK-NEXT: ld1 { v16.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #480 +; CHECK-NEXT: add x10, sp, #496 ; CHECK-NEXT: ld1 { v4.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #616 +; CHECK-NEXT: add x11, sp, #632 ; CHECK-NEXT: ld1 { v7.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #680 +; CHECK-NEXT: add x9, sp, #696 ; CHECK-NEXT: ld1 { v5.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #424 +; CHECK-NEXT: add x8, sp, #440 ; CHECK-NEXT: ld1 { v16.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #488 +; CHECK-NEXT: add x10, sp, #504 ; CHECK-NEXT: ld1 { v4.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #624 +; CHECK-NEXT: add x11, sp, #640 ; CHECK-NEXT: ld1 { v7.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #688 +; CHECK-NEXT: add x9, sp, #704 ; CHECK-NEXT: ld1 { v5.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #432 +; CHECK-NEXT: add x8, sp, #448 ; CHECK-NEXT: ld1 { v16.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #496 +; CHECK-NEXT: add x10, sp, #512 ; CHECK-NEXT: ld1 { v4.b }[14], [x11] -; CHECK-NEXT: add x11, sp, #632 +; CHECK-NEXT: add x11, sp, #648 ; CHECK-NEXT: ld1 { v7.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #696 +; CHECK-NEXT: add x9, sp, #712 ; CHECK-NEXT: ld1 { v5.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #440 +; CHECK-NEXT: add x8, sp, #456 ; CHECK-NEXT: ld1 { v16.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #504 +; CHECK-NEXT: add x10, sp, #520 ; CHECK-NEXT: movi v6.2d, #0000000000000000 ; CHECK-NEXT: ld1 { v4.b }[15], [x11] ; CHECK-NEXT: movi v17.2d, #0000000000000000 @@ -860,6 +863,7 @@ define i32 @test_sdot_v24i8_double(<24 x i8> %a, <24 x i8> %b, <24 x i8> %c, <24 ; CHECK-NEXT: add w8, w8, w9 ; CHECK-NEXT: add w9, w10, w11 ; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <24 x i8> %a to <24 x i32> @@ -877,93 +881,96 @@ entry: define i32 @test_sdot_v24i8_double_nomla(<24 x i8> %a, <24 x i8> %b, <24 x i8> %c, <24 x i8> %d) { ; CHECK-LABEL: test_sdot_v24i8_double_nomla: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr b1, [sp, #320] -; CHECK-NEXT: add x8, sp, #328 -; CHECK-NEXT: add x9, sp, #376 -; CHECK-NEXT: add x10, sp, #8 -; CHECK-NEXT: ldr b2, [sp, #64] +; CHECK-NEXT: ldr b1, [sp, #336] +; CHECK-NEXT: add x8, sp, #344 +; CHECK-NEXT: add x9, sp, #392 +; CHECK-NEXT: add x10, sp, #24 +; CHECK-NEXT: ldr b2, [sp, #80] ; CHECK-NEXT: mov v0.b[1], w1 -; CHECK-NEXT: add x13, sp, #72 +; CHECK-NEXT: add x13, sp, #88 ; CHECK-NEXT: ld1 { v1.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #336 -; CHECK-NEXT: ldr b3, [sp, #448] -; CHECK-NEXT: add x14, sp, #456 +; CHECK-NEXT: add x8, sp, #352 +; CHECK-NEXT: ldr b3, [sp, #464] +; CHECK-NEXT: add x14, sp, #472 ; CHECK-NEXT: ld1 { v2.b }[1], [x13] -; CHECK-NEXT: add x11, sp, #16 +; CHECK-NEXT: add x11, sp, #32 ; CHECK-NEXT: mov v0.b[2], w2 -; CHECK-NEXT: add x13, sp, #80 +; CHECK-NEXT: add x13, sp, #96 ; CHECK-NEXT: ld1 { v1.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #344 +; CHECK-NEXT: add x8, sp, #360 ; CHECK-NEXT: ld1 { v3.b }[1], [x14] -; CHECK-NEXT: add x14, sp, #464 +; CHECK-NEXT: add x14, sp, #480 ; CHECK-NEXT: ld1 { v2.b }[2], [x13] -; CHECK-NEXT: add x12, sp, #24 +; CHECK-NEXT: add x12, sp, #40 ; CHECK-NEXT: mov v0.b[3], w3 -; CHECK-NEXT: add x13, sp, #88 +; CHECK-NEXT: add x13, sp, #104 ; CHECK-NEXT: ld1 { v1.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #352 +; CHECK-NEXT: add x8, sp, #368 ; CHECK-NEXT: ld1 { v3.b }[2], [x14] -; CHECK-NEXT: add x14, sp, #472 +; CHECK-NEXT: add x14, sp, #488 ; CHECK-NEXT: ld1 { v2.b }[3], [x13] -; CHECK-NEXT: add x13, sp, #96 +; CHECK-NEXT: add x13, sp, #112 ; CHECK-NEXT: mov v0.b[4], w4 ; CHECK-NEXT: movi v6.8b, #1 ; CHECK-NEXT: ld1 { v1.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #360 +; CHECK-NEXT: add x8, sp, #376 ; CHECK-NEXT: ld1 { v3.b }[3], [x14] -; CHECK-NEXT: add x14, sp, #480 +; CHECK-NEXT: add x14, sp, #496 ; CHECK-NEXT: ld1 { v2.b }[4], [x13] -; CHECK-NEXT: add x13, sp, #104 +; CHECK-NEXT: add x13, sp, #120 ; CHECK-NEXT: mov v0.b[5], w5 ; CHECK-NEXT: ld1 { v1.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #368 +; CHECK-NEXT: add x8, sp, #384 ; CHECK-NEXT: ld1 { v3.b }[4], [x14] -; CHECK-NEXT: add x14, sp, #488 +; CHECK-NEXT: add x14, sp, #504 ; CHECK-NEXT: ld1 { v2.b }[5], [x13] -; CHECK-NEXT: add x13, sp, #496 +; CHECK-NEXT: add x13, sp, #512 ; CHECK-NEXT: mov v0.b[6], w6 ; CHECK-NEXT: ld1 { v1.b }[6], [x8] -; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ld1 { v3.b }[5], [x14] ; CHECK-NEXT: movi v4.16b, #1 ; CHECK-NEXT: mov v0.b[7], w7 ; CHECK-NEXT: ld1 { v1.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #384 +; CHECK-NEXT: add x9, sp, #400 ; CHECK-NEXT: ld1 { v3.b }[6], [x13] ; CHECK-NEXT: movi v5.2d, #0000000000000000 ; CHECK-NEXT: ld1 { v0.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #32 +; CHECK-NEXT: add x8, sp, #48 ; CHECK-NEXT: ld1 { v1.b }[8], [x9] -; CHECK-NEXT: add x9, sp, #392 +; CHECK-NEXT: add x9, sp, #408 ; CHECK-NEXT: movi v7.2d, #0000000000000000 ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: ld1 { v0.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #40 +; CHECK-NEXT: add x10, sp, #56 ; CHECK-NEXT: ld1 { v1.b }[9], [x9] -; CHECK-NEXT: add x9, sp, #400 +; CHECK-NEXT: add x9, sp, #416 ; CHECK-NEXT: movi v17.2d, #0000000000000000 ; CHECK-NEXT: ld1 { v0.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #48 +; CHECK-NEXT: add x11, sp, #64 ; CHECK-NEXT: ld1 { v1.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #408 +; CHECK-NEXT: add x9, sp, #424 ; CHECK-NEXT: ld1 { v0.b }[11], [x12] -; CHECK-NEXT: add x12, sp, #56 +; CHECK-NEXT: add x12, sp, #72 ; CHECK-NEXT: ld1 { v1.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #416 +; CHECK-NEXT: add x9, sp, #432 ; CHECK-NEXT: ld1 { v0.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #424 +; CHECK-NEXT: add x8, sp, #440 ; CHECK-NEXT: ld1 { v1.b }[12], [x9] -; CHECK-NEXT: add x9, sp, #112 +; CHECK-NEXT: add x9, sp, #128 ; CHECK-NEXT: ld1 { v0.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: add x10, sp, #136 ; CHECK-NEXT: ld1 { v1.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #432 +; CHECK-NEXT: add x8, sp, #448 ; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #504 +; CHECK-NEXT: add x9, sp, #520 ; CHECK-NEXT: ld1 { v0.b }[14], [x11] ; CHECK-NEXT: ld1 { v1.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #440 +; CHECK-NEXT: add x8, sp, #456 ; CHECK-NEXT: ld1 { v2.b }[7], [x10] ; CHECK-NEXT: ld1 { v3.b }[7], [x9] ; CHECK-NEXT: ld1 { v0.b }[15], [x12] @@ -983,6 +990,7 @@ define i32 @test_sdot_v24i8_double_nomla(<24 x i8> %a, <24 x i8> %b, <24 x i8> % ; CHECK-NEXT: add w8, w9, w8 ; CHECK-NEXT: add w9, w10, w11 ; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <24 x i8> %a to <24 x i32> @@ -1104,202 +1112,205 @@ entry: define i32 @test_sdot_v25i8_double(<25 x i8> %a, <25 x i8> %b, <25 x i8> %c, <25 x i8> %d) { ; CHECK-LABEL: test_sdot_v25i8_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b2, [sp, #64] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: ldr b0, [sp] -; CHECK-NEXT: add x9, sp, #80 -; CHECK-NEXT: ldr b4, [sp, #264] -; CHECK-NEXT: add x10, sp, #272 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b2, [sp, #80] +; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: ldr b0, [sp, #16] +; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: ldr b4, [sp, #280] +; CHECK-NEXT: add x10, sp, #288 ; CHECK-NEXT: ld1 { v2.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: add x11, sp, #288 -; CHECK-NEXT: add x12, sp, #40 +; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: add x11, sp, #304 +; CHECK-NEXT: add x12, sp, #56 ; CHECK-NEXT: ld1 { v4.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #280 +; CHECK-NEXT: add x10, sp, #296 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: add x8, sp, #104 ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #16 -; CHECK-NEXT: ldr b1, [sp, #200] +; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: ldr b1, [sp, #216] ; CHECK-NEXT: fmov s6, w0 ; CHECK-NEXT: ld1 { v4.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: add x10, sp, #128 ; CHECK-NEXT: ld1 { v0.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: add x9, sp, #112 ; CHECK-NEXT: ld1 { v2.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: ldr b16, [sp, #136] +; CHECK-NEXT: add x8, sp, #40 +; CHECK-NEXT: ldr b16, [sp, #152] ; CHECK-NEXT: ld1 { v4.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #208 +; CHECK-NEXT: add x11, sp, #224 ; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #104 +; CHECK-NEXT: add x8, sp, #120 ; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: add x9, sp, #48 ; CHECK-NEXT: ld1 { v1.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #312 -; CHECK-NEXT: ldr b3, [sp, #464] +; CHECK-NEXT: add x11, sp, #328 +; CHECK-NEXT: ldr b3, [sp, #480] ; CHECK-NEXT: ld1 { v0.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #48 +; CHECK-NEXT: add x9, sp, #64 ; CHECK-NEXT: ld1 { v2.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #120 +; CHECK-NEXT: add x8, sp, #136 ; CHECK-NEXT: mov v6.b[1], w1 -; CHECK-NEXT: ldr b7, [sp, #336] -; CHECK-NEXT: ldr b19, [sp, #536] +; CHECK-NEXT: ldr b7, [sp, #352] +; CHECK-NEXT: ldr b19, [sp, #552] ; CHECK-NEXT: ld1 { v0.b }[5], [x12] -; CHECK-NEXT: add x12, sp, #56 +; CHECK-NEXT: add x12, sp, #72 ; CHECK-NEXT: ld1 { v2.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #144 +; CHECK-NEXT: add x10, sp, #160 ; CHECK-NEXT: mov v6.b[2], w2 -; CHECK-NEXT: ldr b5, [sp, #128] -; CHECK-NEXT: ldr b17, [sp, #328] +; CHECK-NEXT: ldr b5, [sp, #144] +; CHECK-NEXT: ldr b17, [sp, #344] ; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #216 +; CHECK-NEXT: add x9, sp, #232 ; CHECK-NEXT: ld1 { v16.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #296 +; CHECK-NEXT: add x10, sp, #312 ; CHECK-NEXT: ld1 { v2.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #152 +; CHECK-NEXT: add x8, sp, #168 ; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #224 +; CHECK-NEXT: add x9, sp, #240 ; CHECK-NEXT: ld1 { v4.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #304 +; CHECK-NEXT: add x10, sp, #320 ; CHECK-NEXT: ld1 { v16.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #160 +; CHECK-NEXT: add x8, sp, #176 ; CHECK-NEXT: mov v6.b[3], w3 -; CHECK-NEXT: ldr b20, [sp, #528] +; CHECK-NEXT: ldr b20, [sp, #544] ; CHECK-NEXT: ld1 { v1.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #232 +; CHECK-NEXT: add x9, sp, #248 ; CHECK-NEXT: ld1 { v4.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #472 +; CHECK-NEXT: add x10, sp, #488 ; CHECK-NEXT: ld1 { v16.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #168 +; CHECK-NEXT: add x8, sp, #184 ; CHECK-NEXT: mov v6.b[4], w4 ; CHECK-NEXT: ld1 { v0.b }[7], [x12] ; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #240 +; CHECK-NEXT: add x9, sp, #256 ; CHECK-NEXT: ld1 { v3.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #480 +; CHECK-NEXT: add x10, sp, #496 ; CHECK-NEXT: ld1 { v16.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #176 +; CHECK-NEXT: add x8, sp, #192 ; CHECK-NEXT: mov v6.b[5], w5 ; CHECK-NEXT: ld1 { v4.b }[6], [x11] ; CHECK-NEXT: ld1 { v1.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #248 +; CHECK-NEXT: add x9, sp, #264 ; CHECK-NEXT: ld1 { v3.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #488 +; CHECK-NEXT: add x10, sp, #504 ; CHECK-NEXT: ld1 { v16.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #184 +; CHECK-NEXT: add x8, sp, #200 ; CHECK-NEXT: mov v6.b[6], w6 -; CHECK-NEXT: add x11, sp, #320 +; CHECK-NEXT: add x11, sp, #336 ; CHECK-NEXT: ld1 { v1.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #256 +; CHECK-NEXT: add x9, sp, #272 ; CHECK-NEXT: ld1 { v3.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #512 +; CHECK-NEXT: add x10, sp, #528 ; CHECK-NEXT: ld1 { v16.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #496 +; CHECK-NEXT: add x8, sp, #512 ; CHECK-NEXT: mov v6.b[7], w7 ; CHECK-NEXT: ld1 { v4.b }[7], [x11] ; CHECK-NEXT: ld1 { v1.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #192 +; CHECK-NEXT: add x9, sp, #208 ; CHECK-NEXT: ld1 { v3.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #504 +; CHECK-NEXT: add x8, sp, #520 ; CHECK-NEXT: sshll v5.8h, v5.8b, #0 -; CHECK-NEXT: add x11, sp, #672 +; CHECK-NEXT: add x11, sp, #688 ; CHECK-NEXT: ld1 { v16.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #344 +; CHECK-NEXT: add x9, sp, #360 ; CHECK-NEXT: sshll v6.8h, v6.8b, #0 ; CHECK-NEXT: ld1 { v3.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #352 +; CHECK-NEXT: add x8, sp, #368 ; CHECK-NEXT: ld1 { v7.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #544 +; CHECK-NEXT: add x9, sp, #560 ; CHECK-NEXT: sshll v16.8h, v16.8b, #0 ; CHECK-NEXT: sshll v17.8h, v17.8b, #0 ; CHECK-NEXT: ld1 { v19.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #360 +; CHECK-NEXT: add x9, sp, #376 ; CHECK-NEXT: ld1 { v7.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #552 +; CHECK-NEXT: add x8, sp, #568 ; CHECK-NEXT: smull2 v18.4s, v6.8h, v16.8h ; CHECK-NEXT: ld1 { v3.b }[6], [x10] ; CHECK-NEXT: smull v6.4s, v6.4h, v16.4h -; CHECK-NEXT: ldr b16, [sp, #400] +; CHECK-NEXT: ldr b16, [sp, #416] ; CHECK-NEXT: ld1 { v19.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #560 +; CHECK-NEXT: add x8, sp, #576 ; CHECK-NEXT: ld1 { v7.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #368 -; CHECK-NEXT: add x10, sp, #408 +; CHECK-NEXT: add x9, sp, #384 +; CHECK-NEXT: add x10, sp, #424 ; CHECK-NEXT: sshll v2.8h, v2.8b, #0 ; CHECK-NEXT: ld1 { v19.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #568 +; CHECK-NEXT: add x8, sp, #584 ; CHECK-NEXT: ld1 { v7.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #376 +; CHECK-NEXT: add x9, sp, #392 ; CHECK-NEXT: sshll v4.8h, v4.8b, #0 ; CHECK-NEXT: ld1 { v16.b }[1], [x10] ; CHECK-NEXT: smull v5.4s, v5.4h, v17.4h -; CHECK-NEXT: ldr b17, [sp, #664] +; CHECK-NEXT: ldr b17, [sp, #680] ; CHECK-NEXT: ld1 { v19.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #576 +; CHECK-NEXT: add x8, sp, #592 ; CHECK-NEXT: ld1 { v7.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #384 -; CHECK-NEXT: add x10, sp, #416 +; CHECK-NEXT: add x9, sp, #400 +; CHECK-NEXT: add x10, sp, #432 ; CHECK-NEXT: ld1 { v17.b }[1], [x11] ; CHECK-NEXT: smlal v6.4s, v2.4h, v4.4h -; CHECK-NEXT: add x11, sp, #680 +; CHECK-NEXT: add x11, sp, #696 ; CHECK-NEXT: ld1 { v19.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #584 +; CHECK-NEXT: add x8, sp, #600 ; CHECK-NEXT: ld1 { v7.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #392 +; CHECK-NEXT: add x9, sp, #408 ; CHECK-NEXT: smlal2 v18.4s, v2.8h, v4.8h -; CHECK-NEXT: ldr b2, [sp, #600] +; CHECK-NEXT: ldr b2, [sp, #616] ; CHECK-NEXT: ld1 { v16.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #424 +; CHECK-NEXT: add x10, sp, #440 ; CHECK-NEXT: ld1 { v19.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #592 +; CHECK-NEXT: add x8, sp, #608 ; CHECK-NEXT: ld1 { v7.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #608 +; CHECK-NEXT: add x9, sp, #624 ; CHECK-NEXT: ld1 { v17.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #520 +; CHECK-NEXT: add x11, sp, #536 ; CHECK-NEXT: ld1 { v16.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #688 +; CHECK-NEXT: add x10, sp, #704 ; CHECK-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #616 +; CHECK-NEXT: add x9, sp, #632 ; CHECK-NEXT: ld1 { v19.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #432 +; CHECK-NEXT: add x8, sp, #448 ; CHECK-NEXT: ld1 { v17.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #696 +; CHECK-NEXT: add x10, sp, #712 ; CHECK-NEXT: sshll v4.8h, v7.8b, #0 ; CHECK-NEXT: ld1 { v3.b }[7], [x11] ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #624 +; CHECK-NEXT: add x9, sp, #640 ; CHECK-NEXT: sshll v7.8h, v19.8b, #0 ; CHECK-NEXT: ld1 { v16.b }[4], [x8] ; CHECK-NEXT: ld1 { v17.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #704 +; CHECK-NEXT: add x10, sp, #720 ; CHECK-NEXT: smull2 v19.4s, v4.8h, v7.8h -; CHECK-NEXT: add x8, sp, #440 +; CHECK-NEXT: add x8, sp, #456 ; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #632 +; CHECK-NEXT: add x9, sp, #648 ; CHECK-NEXT: smull v4.4s, v4.4h, v7.4h -; CHECK-NEXT: ldr b7, [sp, #728] +; CHECK-NEXT: ldr b7, [sp, #744] ; CHECK-NEXT: sshll v20.8h, v20.8b, #0 ; CHECK-NEXT: ld1 { v17.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #712 +; CHECK-NEXT: add x10, sp, #728 ; CHECK-NEXT: ld1 { v16.b }[5], [x8] ; CHECK-NEXT: sshll v7.8h, v7.8b, #0 ; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #640 -; CHECK-NEXT: add x8, sp, #448 +; CHECK-NEXT: add x9, sp, #656 +; CHECK-NEXT: add x8, sp, #464 ; CHECK-NEXT: smull v7.4s, v20.4h, v7.4h ; CHECK-NEXT: ld1 { v17.b }[6], [x10] ; CHECK-NEXT: mov v5.s[1], wzr -; CHECK-NEXT: add x10, sp, #720 +; CHECK-NEXT: add x10, sp, #736 ; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #648 +; CHECK-NEXT: add x9, sp, #664 ; CHECK-NEXT: ld1 { v16.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #456 +; CHECK-NEXT: add x8, sp, #472 ; CHECK-NEXT: mov v7.s[1], wzr ; CHECK-NEXT: ld1 { v17.b }[7], [x10] ; CHECK-NEXT: mov v5.s[2], wzr ; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #656 +; CHECK-NEXT: add x9, sp, #672 ; CHECK-NEXT: ld1 { v16.b }[7], [x8] ; CHECK-NEXT: mov v7.s[2], wzr ; CHECK-NEXT: sshll v3.8h, v3.8b, #0 @@ -1324,6 +1335,7 @@ define i32 @test_sdot_v25i8_double(<25 x i8> %a, <25 x i8> %b, <25 x i8> %c, <25 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <25 x i8> %a to <25 x i32> @@ -1341,94 +1353,97 @@ entry: define i32 @test_sdot_v25i8_double_nomla(<25 x i8> %a, <25 x i8> %b, <25 x i8> %c, <25 x i8> %d) { ; CHECK-LABEL: test_sdot_v25i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #64] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: ldr b2, [sp] -; CHECK-NEXT: add x9, sp, #8 -; CHECK-NEXT: ldr b3, [sp, #464] -; CHECK-NEXT: add x10, sp, #472 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b0, [sp, #80] +; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: ldr b2, [sp, #16] +; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: ldr b3, [sp, #480] +; CHECK-NEXT: add x10, sp, #488 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #80 +; CHECK-NEXT: add x8, sp, #96 ; CHECK-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #16 -; CHECK-NEXT: add x11, sp, #104 +; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: add x11, sp, #120 ; CHECK-NEXT: ld1 { v3.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #480 -; CHECK-NEXT: add x12, sp, #32 +; CHECK-NEXT: add x10, sp, #496 +; CHECK-NEXT: add x12, sp, #48 ; CHECK-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: add x8, sp, #104 ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: ldr b4, [sp, #128] +; CHECK-NEXT: add x9, sp, #40 +; CHECK-NEXT: ldr b4, [sp, #144] ; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: ld1 { v3.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #48 +; CHECK-NEXT: add x10, sp, #64 ; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #96 +; CHECK-NEXT: add x8, sp, #112 ; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #120 +; CHECK-NEXT: add x9, sp, #136 ; CHECK-NEXT: sshll v4.8h, v4.8b, #0 -; CHECK-NEXT: ldr b6, [sp, #400] +; CHECK-NEXT: ldr b6, [sp, #416] ; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: ldr b16, [sp, #528] +; CHECK-NEXT: ldr b16, [sp, #544] ; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #112 +; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: ld1 { v2.b }[4], [x12] -; CHECK-NEXT: add x12, sp, #40 +; CHECK-NEXT: add x12, sp, #56 ; CHECK-NEXT: sshll v5.4s, v4.4h, #0 -; CHECK-NEXT: ldr b4, [sp, #336] +; CHECK-NEXT: ldr b4, [sp, #352] ; CHECK-NEXT: mov v1.b[2], w2 ; CHECK-NEXT: ld1 { v0.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #488 +; CHECK-NEXT: add x11, sp, #504 ; CHECK-NEXT: ld1 { v2.b }[5], [x12] ; CHECK-NEXT: sshll v16.8h, v16.8b, #0 ; CHECK-NEXT: ld1 { v3.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #344 +; CHECK-NEXT: add x11, sp, #360 ; CHECK-NEXT: ld1 { v0.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #496 +; CHECK-NEXT: add x8, sp, #512 ; CHECK-NEXT: ld1 { v2.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #352 +; CHECK-NEXT: add x10, sp, #368 ; CHECK-NEXT: ld1 { v4.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #416 +; CHECK-NEXT: add x11, sp, #432 ; CHECK-NEXT: ld1 { v3.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #504 +; CHECK-NEXT: add x8, sp, #520 ; CHECK-NEXT: ld1 { v0.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #408 +; CHECK-NEXT: add x9, sp, #424 ; CHECK-NEXT: mov v1.b[3], w3 ; CHECK-NEXT: ld1 { v4.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #360 +; CHECK-NEXT: add x10, sp, #376 ; CHECK-NEXT: ld1 { v6.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #56 +; CHECK-NEXT: add x9, sp, #72 ; CHECK-NEXT: ld1 { v3.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: add x8, sp, #528 ; CHECK-NEXT: mov v1.b[4], w4 ; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #424 +; CHECK-NEXT: add x9, sp, #440 ; CHECK-NEXT: ld1 { v6.b }[2], [x11] ; CHECK-NEXT: ld1 { v4.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #368 +; CHECK-NEXT: add x10, sp, #384 ; CHECK-NEXT: ld1 { v3.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #376 +; CHECK-NEXT: add x8, sp, #392 ; CHECK-NEXT: mov v1.b[5], w5 ; CHECK-NEXT: ld1 { v6.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #432 +; CHECK-NEXT: add x9, sp, #448 ; CHECK-NEXT: ld1 { v4.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #440 +; CHECK-NEXT: add x10, sp, #456 ; CHECK-NEXT: sshll v16.4s, v16.4h, #0 ; CHECK-NEXT: mov v1.b[6], w6 ; CHECK-NEXT: ld1 { v6.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #520 +; CHECK-NEXT: add x9, sp, #536 ; CHECK-NEXT: ld1 { v4.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #384 +; CHECK-NEXT: add x8, sp, #400 ; CHECK-NEXT: mov v5.s[1], wzr ; CHECK-NEXT: mov v16.s[1], wzr ; CHECK-NEXT: ld1 { v3.b }[7], [x9] ; CHECK-NEXT: ld1 { v6.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #448 +; CHECK-NEXT: add x10, sp, #464 ; CHECK-NEXT: ld1 { v4.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #392 +; CHECK-NEXT: add x8, sp, #408 ; CHECK-NEXT: mov v1.b[7], w7 -; CHECK-NEXT: add x9, sp, #456 +; CHECK-NEXT: add x9, sp, #472 ; CHECK-NEXT: mov v5.s[2], wzr ; CHECK-NEXT: ld1 { v6.b }[6], [x10] ; CHECK-NEXT: mov v16.s[2], wzr @@ -1457,6 +1472,7 @@ define i32 @test_sdot_v25i8_double_nomla(<25 x i8> %a, <25 x i8> %b, <25 x i8> % ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <25 x i8> %a to <25 x i32> @@ -1710,272 +1726,275 @@ entry: define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33 x i8> %d) { ; CHECK-LABEL: test_sdot_v33i8_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #64] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: ldr b3, [sp] -; CHECK-NEXT: add x9, sp, #136 -; CHECK-NEXT: ldr b2, [sp, #128] -; CHECK-NEXT: add x10, sp, #88 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b0, [sp, #80] +; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: ldr b3, [sp, #16] +; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: ldr b2, [sp, #144] +; CHECK-NEXT: add x10, sp, #104 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #80 -; CHECK-NEXT: ldr b4, [sp, #328] -; CHECK-NEXT: add x11, sp, #352 +; CHECK-NEXT: add x8, sp, #96 +; CHECK-NEXT: ldr b4, [sp, #344] +; CHECK-NEXT: add x11, sp, #368 ; CHECK-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #144 -; CHECK-NEXT: ldr b7, [sp, #200] +; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: ldr b7, [sp, #216] ; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: ldr b17, [sp, #264] +; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: ldr b17, [sp, #280] ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: add x9, sp, #168 ; CHECK-NEXT: ld1 { v3.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: add x8, sp, #32 ; CHECK-NEXT: ld1 { v0.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #96 -; CHECK-NEXT: ldr b16, [sp, #392] +; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: ldr b16, [sp, #408] ; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: add x9, sp, #176 ; CHECK-NEXT: ld1 { v3.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: add x8, sp, #40 ; CHECK-NEXT: ld1 { v0.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #104 -; CHECK-NEXT: ldr b5, [sp, #192] +; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: ldr b5, [sp, #208] ; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #168 +; CHECK-NEXT: add x9, sp, #184 ; CHECK-NEXT: ld1 { v3.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #32 +; CHECK-NEXT: add x8, sp, #48 ; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: add x10, sp, #128 ; CHECK-NEXT: sshll v5.8h, v5.8b, #0 -; CHECK-NEXT: ldr b20, [sp, #856] +; CHECK-NEXT: ldr b20, [sp, #872] ; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #176 +; CHECK-NEXT: add x9, sp, #192 ; CHECK-NEXT: ld1 { v3.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #40 +; CHECK-NEXT: add x8, sp, #56 ; CHECK-NEXT: ld1 { v0.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #48 +; CHECK-NEXT: add x10, sp, #64 ; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: ldr b21, [sp, #728] +; CHECK-NEXT: ldr b21, [sp, #744] ; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #184 +; CHECK-NEXT: add x9, sp, #200 ; CHECK-NEXT: ld1 { v3.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #120 +; CHECK-NEXT: add x8, sp, #136 ; CHECK-NEXT: mov v1.b[2], w2 ; CHECK-NEXT: ld1 { v0.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #56 +; CHECK-NEXT: add x8, sp, #72 ; CHECK-NEXT: ld1 { v3.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #272 +; CHECK-NEXT: add x10, sp, #288 ; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #208 +; CHECK-NEXT: add x9, sp, #224 ; CHECK-NEXT: mov v1.b[3], w3 ; CHECK-NEXT: ld1 { v17.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #224 +; CHECK-NEXT: add x10, sp, #240 ; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #336 +; CHECK-NEXT: add x8, sp, #352 ; CHECK-NEXT: ld1 { v7.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #400 +; CHECK-NEXT: add x9, sp, #416 ; CHECK-NEXT: sshll v19.8h, v2.8b, #0 -; CHECK-NEXT: ldr b2, [sp, #456] +; CHECK-NEXT: ldr b2, [sp, #472] ; CHECK-NEXT: ld1 { v4.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #344 +; CHECK-NEXT: add x8, sp, #360 ; CHECK-NEXT: ld1 { v16.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #408 +; CHECK-NEXT: add x9, sp, #424 ; CHECK-NEXT: sshll v3.8h, v3.8b, #0 ; CHECK-NEXT: mov v1.b[4], w4 ; CHECK-NEXT: ld1 { v4.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #216 +; CHECK-NEXT: add x8, sp, #232 ; CHECK-NEXT: ld1 { v16.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #416 +; CHECK-NEXT: add x9, sp, #432 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-NEXT: ld1 { v7.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #280 +; CHECK-NEXT: add x8, sp, #296 ; CHECK-NEXT: ld1 { v4.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #360 +; CHECK-NEXT: add x11, sp, #376 ; CHECK-NEXT: ld1 { v16.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #424 +; CHECK-NEXT: add x9, sp, #440 ; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #288 +; CHECK-NEXT: add x8, sp, #304 ; CHECK-NEXT: ld1 { v7.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: add x10, sp, #248 ; CHECK-NEXT: ld1 { v4.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #368 +; CHECK-NEXT: add x11, sp, #384 ; CHECK-NEXT: ld1 { v16.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #432 +; CHECK-NEXT: add x9, sp, #448 ; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #296 +; CHECK-NEXT: add x8, sp, #312 ; CHECK-NEXT: ld1 { v7.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #240 +; CHECK-NEXT: add x10, sp, #256 ; CHECK-NEXT: ld1 { v4.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #376 +; CHECK-NEXT: add x11, sp, #392 ; CHECK-NEXT: ld1 { v16.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #440 +; CHECK-NEXT: add x9, sp, #456 ; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #304 +; CHECK-NEXT: add x8, sp, #320 ; CHECK-NEXT: ld1 { v7.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #248 +; CHECK-NEXT: add x10, sp, #264 ; CHECK-NEXT: ld1 { v4.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #384 +; CHECK-NEXT: add x11, sp, #400 ; CHECK-NEXT: ld1 { v16.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #448 +; CHECK-NEXT: add x9, sp, #464 ; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #312 +; CHECK-NEXT: add x8, sp, #328 ; CHECK-NEXT: ld1 { v7.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #256 +; CHECK-NEXT: add x10, sp, #272 ; CHECK-NEXT: ld1 { v4.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #480 +; CHECK-NEXT: add x11, sp, #496 ; CHECK-NEXT: ld1 { v16.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #472 +; CHECK-NEXT: add x9, sp, #488 ; CHECK-NEXT: ld1 { v17.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #320 +; CHECK-NEXT: add x8, sp, #336 ; CHECK-NEXT: ld1 { v7.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #624 +; CHECK-NEXT: add x10, sp, #640 ; CHECK-NEXT: sshll v6.8h, v4.8b, #0 ; CHECK-NEXT: sshll v16.8h, v16.8b, #0 ; CHECK-NEXT: ld1 { v17.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #600 +; CHECK-NEXT: add x8, sp, #616 ; CHECK-NEXT: sshll v18.8h, v7.8b, #0 -; CHECK-NEXT: ldr b7, [sp, #592] +; CHECK-NEXT: ldr b7, [sp, #608] ; CHECK-NEXT: mov v1.b[5], w5 ; CHECK-NEXT: ld1 { v7.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #608 +; CHECK-NEXT: add x8, sp, #624 ; CHECK-NEXT: sshll v4.8h, v17.8b, #0 ; CHECK-NEXT: sshll v17.8h, v2.8b, #0 ; CHECK-NEXT: smull2 v2.4s, v3.8h, v4.8h ; CHECK-NEXT: smull v3.4s, v3.4h, v4.4h ; CHECK-NEXT: ld1 { v7.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #616 +; CHECK-NEXT: add x8, sp, #632 ; CHECK-NEXT: smull v4.4s, v5.4h, v17.4h -; CHECK-NEXT: ldr b17, [sp, #528] +; CHECK-NEXT: ldr b17, [sp, #544] ; CHECK-NEXT: smlal2 v2.4s, v19.8h, v16.8h ; CHECK-NEXT: smlal v3.4s, v19.4h, v16.4h -; CHECK-NEXT: ldr b16, [sp, #464] +; CHECK-NEXT: ldr b16, [sp, #480] ; CHECK-NEXT: ld1 { v7.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #536 -; CHECK-NEXT: ldr b19, [sp, #656] +; CHECK-NEXT: add x8, sp, #552 +; CHECK-NEXT: ldr b19, [sp, #672] ; CHECK-NEXT: ld1 { v16.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #664 +; CHECK-NEXT: add x9, sp, #680 ; CHECK-NEXT: ld1 { v17.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #544 +; CHECK-NEXT: add x8, sp, #560 ; CHECK-NEXT: ld1 { v7.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #632 +; CHECK-NEXT: add x10, sp, #648 ; CHECK-NEXT: ld1 { v19.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #672 +; CHECK-NEXT: add x9, sp, #688 ; CHECK-NEXT: ld1 { v16.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #488 +; CHECK-NEXT: add x11, sp, #504 ; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #552 +; CHECK-NEXT: add x8, sp, #568 ; CHECK-NEXT: ld1 { v7.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #640 +; CHECK-NEXT: add x10, sp, #656 ; CHECK-NEXT: ld1 { v19.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #680 +; CHECK-NEXT: add x9, sp, #696 ; CHECK-NEXT: ld1 { v16.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #496 +; CHECK-NEXT: add x11, sp, #512 ; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #560 +; CHECK-NEXT: add x8, sp, #576 ; CHECK-NEXT: mov v4.s[1], wzr ; CHECK-NEXT: ld1 { v7.b }[6], [x10] ; CHECK-NEXT: ld1 { v19.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #688 +; CHECK-NEXT: add x9, sp, #704 ; CHECK-NEXT: ld1 { v16.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #504 +; CHECK-NEXT: add x11, sp, #520 ; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #568 +; CHECK-NEXT: add x8, sp, #584 ; CHECK-NEXT: mov v1.b[6], w6 -; CHECK-NEXT: add x10, sp, #648 +; CHECK-NEXT: add x10, sp, #664 ; CHECK-NEXT: ld1 { v19.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #696 +; CHECK-NEXT: add x9, sp, #712 ; CHECK-NEXT: mov v4.s[2], wzr ; CHECK-NEXT: ld1 { v16.b }[5], [x11] ; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #864 +; CHECK-NEXT: add x8, sp, #880 ; CHECK-NEXT: mov v1.b[7], w7 ; CHECK-NEXT: ld1 { v7.b }[7], [x10] ; CHECK-NEXT: ld1 { v19.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #704 +; CHECK-NEXT: add x9, sp, #720 ; CHECK-NEXT: ld1 { v20.b }[1], [x8] -; CHECK-NEXT: add x10, sp, #512 -; CHECK-NEXT: add x8, sp, #872 -; CHECK-NEXT: add x11, sp, #576 +; CHECK-NEXT: add x10, sp, #528 +; CHECK-NEXT: add x8, sp, #888 +; CHECK-NEXT: add x11, sp, #592 ; CHECK-NEXT: mov v4.s[3], wzr ; CHECK-NEXT: ld1 { v19.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #712 +; CHECK-NEXT: add x9, sp, #728 ; CHECK-NEXT: ld1 { v16.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #520 +; CHECK-NEXT: add x10, sp, #536 ; CHECK-NEXT: ld1 { v20.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #736 +; CHECK-NEXT: add x8, sp, #752 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 ; CHECK-NEXT: ld1 { v17.b }[6], [x11] ; CHECK-NEXT: ld1 { v19.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #928 +; CHECK-NEXT: add x9, sp, #944 ; CHECK-NEXT: smull2 v5.4s, v1.8h, v18.8h ; CHECK-NEXT: ld1 { v21.b }[1], [x8] ; CHECK-NEXT: smlal v4.4s, v1.4h, v18.4h -; CHECK-NEXT: ldr b1, [sp, #920] -; CHECK-NEXT: add x11, sp, #584 +; CHECK-NEXT: ldr b1, [sp, #936] +; CHECK-NEXT: add x11, sp, #600 ; CHECK-NEXT: ld1 { v16.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #880 -; CHECK-NEXT: add x8, sp, #744 +; CHECK-NEXT: add x10, sp, #896 +; CHECK-NEXT: add x8, sp, #760 ; CHECK-NEXT: ld1 { v1.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #936 +; CHECK-NEXT: add x9, sp, #952 ; CHECK-NEXT: ld1 { v17.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #800 +; CHECK-NEXT: add x11, sp, #816 ; CHECK-NEXT: smlal2 v5.4s, v0.8h, v6.8h ; CHECK-NEXT: ld1 { v20.b }[3], [x10] ; CHECK-NEXT: smlal v4.4s, v0.4h, v6.4h ; CHECK-NEXT: ld1 { v21.b }[2], [x8] -; CHECK-NEXT: ldr b0, [sp, #792] -; CHECK-NEXT: add x10, sp, #888 -; CHECK-NEXT: add x8, sp, #752 +; CHECK-NEXT: ldr b0, [sp, #808] +; CHECK-NEXT: add x10, sp, #904 +; CHECK-NEXT: add x8, sp, #768 ; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #944 -; CHECK-NEXT: ldr b18, [sp, #720] +; CHECK-NEXT: add x9, sp, #960 +; CHECK-NEXT: ldr b18, [sp, #736] ; CHECK-NEXT: ld1 { v0.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #808 +; CHECK-NEXT: add x11, sp, #824 ; CHECK-NEXT: ld1 { v20.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #896 +; CHECK-NEXT: add x10, sp, #912 ; CHECK-NEXT: ld1 { v21.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #760 +; CHECK-NEXT: add x8, sp, #776 ; CHECK-NEXT: ld1 { v1.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #952 +; CHECK-NEXT: add x9, sp, #968 ; CHECK-NEXT: ld1 { v0.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #816 +; CHECK-NEXT: add x11, sp, #832 ; CHECK-NEXT: ld1 { v20.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #904 +; CHECK-NEXT: add x10, sp, #920 ; CHECK-NEXT: ld1 { v21.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #768 +; CHECK-NEXT: add x8, sp, #784 ; CHECK-NEXT: sshll v6.8h, v7.8b, #0 ; CHECK-NEXT: ld1 { v1.b }[4], [x9] ; CHECK-NEXT: sshll v7.8h, v16.8b, #0 ; CHECK-NEXT: ld1 { v0.b }[3], [x11] ; CHECK-NEXT: sshll v16.8h, v18.8b, #0 -; CHECK-NEXT: ldr b18, [sp, #984] +; CHECK-NEXT: ldr b18, [sp, #1000] ; CHECK-NEXT: ld1 { v20.b }[6], [x10] -; CHECK-NEXT: add x9, sp, #960 +; CHECK-NEXT: add x9, sp, #976 ; CHECK-NEXT: ld1 { v21.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #824 +; CHECK-NEXT: add x8, sp, #840 ; CHECK-NEXT: sshll v18.8h, v18.8b, #0 -; CHECK-NEXT: add x10, sp, #912 +; CHECK-NEXT: add x10, sp, #928 ; CHECK-NEXT: ld1 { v1.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #776 +; CHECK-NEXT: add x9, sp, #792 ; CHECK-NEXT: smull v16.4s, v16.4h, v18.4h ; CHECK-NEXT: ld1 { v0.b }[4], [x8] ; CHECK-NEXT: ld1 { v20.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #968 -; CHECK-NEXT: add x8, sp, #832 +; CHECK-NEXT: add x10, sp, #984 +; CHECK-NEXT: add x8, sp, #848 ; CHECK-NEXT: ld1 { v21.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #784 +; CHECK-NEXT: add x9, sp, #800 ; CHECK-NEXT: mov v16.s[1], wzr ; CHECK-NEXT: ld1 { v1.b }[6], [x10] ; CHECK-NEXT: ld1 { v0.b }[5], [x8] -; CHECK-NEXT: add x10, sp, #976 -; CHECK-NEXT: add x8, sp, #840 +; CHECK-NEXT: add x10, sp, #992 +; CHECK-NEXT: add x8, sp, #856 ; CHECK-NEXT: ld1 { v21.b }[7], [x9] ; CHECK-NEXT: sshll v18.8h, v19.8b, #0 ; CHECK-NEXT: mov v16.s[2], wzr ; CHECK-NEXT: ld1 { v1.b }[7], [x10] ; CHECK-NEXT: ld1 { v0.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #848 +; CHECK-NEXT: add x8, sp, #864 ; CHECK-NEXT: sshll v19.8h, v20.8b, #0 ; CHECK-NEXT: sshll v21.8h, v21.8b, #0 ; CHECK-NEXT: mov v16.s[3], wzr @@ -2000,6 +2019,7 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <33 x i8> %a to <33 x i32> @@ -2017,135 +2037,138 @@ entry: define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33 x i8> %d) { ; CHECK-LABEL: test_sdot_v33i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #64] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: ldr b3, [sp, #128] -; CHECK-NEXT: add x10, sp, #80 -; CHECK-NEXT: ldr b5, [sp] -; CHECK-NEXT: add x9, sp, #8 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b0, [sp, #80] +; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: ldr b3, [sp, #144] +; CHECK-NEXT: add x10, sp, #96 +; CHECK-NEXT: ldr b5, [sp, #16] +; CHECK-NEXT: add x9, sp, #24 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #136 +; CHECK-NEXT: add x8, sp, #152 ; CHECK-NEXT: fmov s2, w0 -; CHECK-NEXT: ldr b1, [sp, #192] +; CHECK-NEXT: ldr b1, [sp, #208] ; CHECK-NEXT: ld1 { v5.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #16 +; CHECK-NEXT: add x9, sp, #32 ; CHECK-NEXT: ld1 { v3.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #144 +; CHECK-NEXT: add x8, sp, #160 ; CHECK-NEXT: ld1 { v0.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #88 +; CHECK-NEXT: add x10, sp, #104 ; CHECK-NEXT: mov v2.b[1], w1 -; CHECK-NEXT: ldr b17, [sp, #656] +; CHECK-NEXT: ldr b17, [sp, #672] ; CHECK-NEXT: ld1 { v5.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: add x9, sp, #40 ; CHECK-NEXT: ld1 { v3.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #152 +; CHECK-NEXT: add x8, sp, #168 ; CHECK-NEXT: ld1 { v0.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #96 +; CHECK-NEXT: add x10, sp, #112 ; CHECK-NEXT: mov v2.b[2], w2 -; CHECK-NEXT: add x11, sp, #632 +; CHECK-NEXT: add x11, sp, #648 ; CHECK-NEXT: ld1 { v5.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: add x9, sp, #48 ; CHECK-NEXT: ld1 { v3.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #160 +; CHECK-NEXT: add x8, sp, #176 ; CHECK-NEXT: ld1 { v0.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #104 +; CHECK-NEXT: add x10, sp, #120 ; CHECK-NEXT: mov v2.b[3], w3 ; CHECK-NEXT: ld1 { v5.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #40 +; CHECK-NEXT: add x9, sp, #56 ; CHECK-NEXT: ld1 { v3.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #168 +; CHECK-NEXT: add x8, sp, #184 ; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: add x10, sp, #128 ; CHECK-NEXT: mov v2.b[4], w4 ; CHECK-NEXT: ld1 { v5.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #48 +; CHECK-NEXT: add x9, sp, #64 ; CHECK-NEXT: ld1 { v3.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #176 +; CHECK-NEXT: add x8, sp, #192 ; CHECK-NEXT: ld1 { v0.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: add x10, sp, #136 ; CHECK-NEXT: sshll v4.8h, v1.8b, #0 ; CHECK-NEXT: mov v2.b[5], w5 ; CHECK-NEXT: ld1 { v5.b }[6], [x9] ; CHECK-NEXT: ld1 { v3.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #184 +; CHECK-NEXT: add x8, sp, #200 ; CHECK-NEXT: ld1 { v0.b }[7], [x10] -; CHECK-NEXT: add x9, sp, #56 -; CHECK-NEXT: add x10, sp, #664 +; CHECK-NEXT: add x9, sp, #72 +; CHECK-NEXT: add x10, sp, #680 ; CHECK-NEXT: mov v2.b[6], w6 ; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #600 +; CHECK-NEXT: add x8, sp, #616 ; CHECK-NEXT: sshll v1.8h, v0.8b, #0 -; CHECK-NEXT: ldr b0, [sp, #592] +; CHECK-NEXT: ldr b0, [sp, #608] ; CHECK-NEXT: ld1 { v5.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #472 +; CHECK-NEXT: add x9, sp, #488 ; CHECK-NEXT: mov v2.b[7], w7 ; CHECK-NEXT: ld1 { v17.b }[1], [x10] ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #608 +; CHECK-NEXT: add x8, sp, #624 ; CHECK-NEXT: sshll v7.8h, v3.8b, #0 -; CHECK-NEXT: add x10, sp, #488 +; CHECK-NEXT: add x10, sp, #504 ; CHECK-NEXT: sshll v16.8h, v5.8b, #0 -; CHECK-NEXT: ldr b5, [sp, #464] +; CHECK-NEXT: ldr b5, [sp, #480] ; CHECK-NEXT: sshll v6.8h, v2.8b, #0 ; CHECK-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #616 +; CHECK-NEXT: add x8, sp, #632 ; CHECK-NEXT: saddl2 v2.4s, v16.8h, v7.8h ; CHECK-NEXT: ld1 { v5.b }[1], [x9] ; CHECK-NEXT: saddl v7.4s, v16.4h, v7.4h -; CHECK-NEXT: ldr b16, [sp, #528] -; CHECK-NEXT: add x9, sp, #536 +; CHECK-NEXT: ldr b16, [sp, #544] +; CHECK-NEXT: add x9, sp, #552 ; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #624 +; CHECK-NEXT: add x8, sp, #640 ; CHECK-NEXT: sshll v4.4s, v4.4h, #0 ; CHECK-NEXT: ld1 { v16.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #544 +; CHECK-NEXT: add x9, sp, #560 ; CHECK-NEXT: mov v4.s[1], wzr ; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #480 +; CHECK-NEXT: add x8, sp, #496 ; CHECK-NEXT: saddl2 v3.4s, v6.8h, v1.8h ; CHECK-NEXT: ld1 { v16.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #552 +; CHECK-NEXT: add x9, sp, #568 ; CHECK-NEXT: ld1 { v5.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #672 +; CHECK-NEXT: add x8, sp, #688 ; CHECK-NEXT: mov v4.s[2], wzr ; CHECK-NEXT: ld1 { v0.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #640 +; CHECK-NEXT: add x11, sp, #656 ; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #680 +; CHECK-NEXT: add x8, sp, #696 ; CHECK-NEXT: ld1 { v5.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #496 +; CHECK-NEXT: add x10, sp, #512 ; CHECK-NEXT: mov v4.s[3], wzr ; CHECK-NEXT: ld1 { v16.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #560 +; CHECK-NEXT: add x9, sp, #576 ; CHECK-NEXT: ld1 { v0.b }[6], [x11] ; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #688 +; CHECK-NEXT: add x8, sp, #704 ; CHECK-NEXT: ld1 { v5.b }[4], [x10] -; CHECK-NEXT: add x11, sp, #504 +; CHECK-NEXT: add x11, sp, #520 ; CHECK-NEXT: saddw v4.4s, v4.4s, v6.4h -; CHECK-NEXT: ldr b6, [sp, #720] +; CHECK-NEXT: ldr b6, [sp, #736] ; CHECK-NEXT: ld1 { v16.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #568 +; CHECK-NEXT: add x9, sp, #584 ; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #696 -; CHECK-NEXT: add x10, sp, #648 +; CHECK-NEXT: add x8, sp, #712 +; CHECK-NEXT: add x10, sp, #664 ; CHECK-NEXT: ld1 { v5.b }[5], [x11] ; CHECK-NEXT: saddw v1.4s, v4.4s, v1.4h ; CHECK-NEXT: sshll v4.8h, v6.8b, #0 ; CHECK-NEXT: ld1 { v16.b }[5], [x9] ; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x9, sp, #576 +; CHECK-NEXT: add x9, sp, #592 ; CHECK-NEXT: ld1 { v0.b }[7], [x10] -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: add x10, sp, #704 +; CHECK-NEXT: add x8, sp, #528 +; CHECK-NEXT: add x10, sp, #720 ; CHECK-NEXT: sshll v4.4s, v4.4h, #0 ; CHECK-NEXT: ld1 { v16.b }[6], [x9] ; CHECK-NEXT: ld1 { v5.b }[6], [x8] -; CHECK-NEXT: add x9, sp, #584 +; CHECK-NEXT: add x9, sp, #600 ; CHECK-NEXT: mov v4.s[1], wzr ; CHECK-NEXT: ld1 { v17.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #712 -; CHECK-NEXT: add x8, sp, #520 +; CHECK-NEXT: add x10, sp, #728 +; CHECK-NEXT: add x8, sp, #536 ; CHECK-NEXT: ld1 { v16.b }[7], [x9] ; CHECK-NEXT: add v2.4s, v3.4s, v2.4s ; CHECK-NEXT: mov v4.s[2], wzr @@ -2169,6 +2192,7 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <33 x i8> %a to <33 x i32> @@ -2255,373 +2279,376 @@ entry: define i32 @test_sdot_v48i8_double(<48 x i8> %a, <48 x i8> %b, <48 x i8> %c, <48 x i8> %d) { ; CHECK-LABEL: test_sdot_v48i8_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b2, [sp, #576] -; CHECK-NEXT: add x8, sp, #584 -; CHECK-NEXT: ldr b3, [sp, #192] -; CHECK-NEXT: add x10, sp, #328 -; CHECK-NEXT: ldr b0, [sp, #320] -; CHECK-NEXT: add x9, sp, #592 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b2, [sp, #592] +; CHECK-NEXT: add x8, sp, #600 +; CHECK-NEXT: ldr b3, [sp, #208] +; CHECK-NEXT: add x10, sp, #344 +; CHECK-NEXT: ldr b0, [sp, #336] +; CHECK-NEXT: add x9, sp, #608 ; CHECK-NEXT: ld1 { v2.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #200 +; CHECK-NEXT: add x8, sp, #216 ; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: add x11, sp, #648 +; CHECK-NEXT: add x11, sp, #664 ; CHECK-NEXT: ld1 { v0.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #336 +; CHECK-NEXT: add x10, sp, #352 ; CHECK-NEXT: ld1 { v3.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #208 +; CHECK-NEXT: add x8, sp, #224 ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #600 +; CHECK-NEXT: add x9, sp, #616 ; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: ldr b18, [sp, #1344] +; CHECK-NEXT: ldr b18, [sp, #1360] ; CHECK-NEXT: ld1 { v0.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #224 +; CHECK-NEXT: add x10, sp, #240 ; CHECK-NEXT: ld1 { v3.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #216 +; CHECK-NEXT: add x8, sp, #232 ; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #608 +; CHECK-NEXT: add x9, sp, #624 ; CHECK-NEXT: mov v1.b[2], w2 -; CHECK-NEXT: ldr b19, [sp, #960] -; CHECK-NEXT: ldr b4, [sp, #448] +; CHECK-NEXT: ldr b19, [sp, #976] +; CHECK-NEXT: ldr b4, [sp, #464] ; CHECK-NEXT: ld1 { v3.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #344 +; CHECK-NEXT: add x8, sp, #360 ; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #616 +; CHECK-NEXT: add x9, sp, #632 ; CHECK-NEXT: mov v1.b[3], w3 -; CHECK-NEXT: ldr b5, [sp, #64] +; CHECK-NEXT: ldr b5, [sp, #80] ; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #352 +; CHECK-NEXT: add x8, sp, #368 ; CHECK-NEXT: ld1 { v3.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: add x10, sp, #248 ; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #360 +; CHECK-NEXT: add x9, sp, #376 ; CHECK-NEXT: mov v1.b[4], w4 -; CHECK-NEXT: ldr b16, [sp, #1088] +; CHECK-NEXT: ldr b16, [sp, #1104] ; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #624 +; CHECK-NEXT: add x8, sp, #640 ; CHECK-NEXT: ld1 { v3.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #240 +; CHECK-NEXT: add x10, sp, #256 ; CHECK-NEXT: movi v7.2d, #0000000000000000 -; CHECK-NEXT: ldr b17, [sp, #704] +; CHECK-NEXT: ldr b17, [sp, #720] ; CHECK-NEXT: ld1 { v2.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #368 +; CHECK-NEXT: add x8, sp, #384 ; CHECK-NEXT: ld1 { v0.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #632 +; CHECK-NEXT: add x9, sp, #648 ; CHECK-NEXT: mov v1.b[5], w5 ; CHECK-NEXT: ld1 { v3.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #376 +; CHECK-NEXT: add x10, sp, #392 ; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #640 +; CHECK-NEXT: add x9, sp, #656 ; CHECK-NEXT: ld1 { v0.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #248 +; CHECK-NEXT: add x8, sp, #264 ; CHECK-NEXT: mov v1.b[6], w6 ; CHECK-NEXT: movi v6.2d, #0000000000000000 ; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: add x8, sp, #272 ; CHECK-NEXT: ld1 { v0.b }[7], [x10] -; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: add x10, sp, #16 ; CHECK-NEXT: mov v1.b[7], w7 ; CHECK-NEXT: ld1 { v2.b }[8], [x9] -; CHECK-NEXT: add x9, sp, #384 +; CHECK-NEXT: add x9, sp, #400 ; CHECK-NEXT: ld1 { v3.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #264 +; CHECK-NEXT: add x8, sp, #280 ; CHECK-NEXT: ld1 { v1.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #8 +; CHECK-NEXT: add x10, sp, #24 ; CHECK-NEXT: ld1 { v0.b }[8], [x9] -; CHECK-NEXT: add x9, sp, #392 +; CHECK-NEXT: add x9, sp, #408 ; CHECK-NEXT: ld1 { v3.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #272 +; CHECK-NEXT: add x8, sp, #288 ; CHECK-NEXT: ld1 { v2.b }[9], [x11] -; CHECK-NEXT: add x11, sp, #656 +; CHECK-NEXT: add x11, sp, #672 ; CHECK-NEXT: ld1 { v1.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #16 +; CHECK-NEXT: add x10, sp, #32 ; CHECK-NEXT: ld1 { v0.b }[9], [x9] -; CHECK-NEXT: add x9, sp, #400 +; CHECK-NEXT: add x9, sp, #416 ; CHECK-NEXT: ld1 { v3.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #280 +; CHECK-NEXT: add x8, sp, #296 ; CHECK-NEXT: ld1 { v2.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #664 +; CHECK-NEXT: add x11, sp, #680 ; CHECK-NEXT: ld1 { v1.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #24 +; CHECK-NEXT: add x10, sp, #40 ; CHECK-NEXT: ld1 { v0.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #408 +; CHECK-NEXT: add x9, sp, #424 ; CHECK-NEXT: ld1 { v3.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #288 +; CHECK-NEXT: add x8, sp, #304 ; CHECK-NEXT: ld1 { v2.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #672 +; CHECK-NEXT: add x11, sp, #688 ; CHECK-NEXT: ld1 { v1.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #32 +; CHECK-NEXT: add x10, sp, #48 ; CHECK-NEXT: ld1 { v0.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #416 +; CHECK-NEXT: add x9, sp, #432 ; CHECK-NEXT: ld1 { v3.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #296 +; CHECK-NEXT: add x8, sp, #312 ; CHECK-NEXT: ld1 { v2.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #680 +; CHECK-NEXT: add x11, sp, #696 ; CHECK-NEXT: ld1 { v1.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #40 +; CHECK-NEXT: add x10, sp, #56 ; CHECK-NEXT: ld1 { v0.b }[12], [x9] -; CHECK-NEXT: add x9, sp, #424 +; CHECK-NEXT: add x9, sp, #440 ; CHECK-NEXT: ld1 { v3.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #304 +; CHECK-NEXT: add x8, sp, #320 ; CHECK-NEXT: ld1 { v2.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #688 +; CHECK-NEXT: add x11, sp, #704 ; CHECK-NEXT: ld1 { v1.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #48 +; CHECK-NEXT: add x10, sp, #64 ; CHECK-NEXT: ld1 { v0.b }[13], [x9] -; CHECK-NEXT: add x9, sp, #432 +; CHECK-NEXT: add x9, sp, #448 ; CHECK-NEXT: ld1 { v3.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #312 +; CHECK-NEXT: add x8, sp, #328 ; CHECK-NEXT: ld1 { v2.b }[14], [x11] -; CHECK-NEXT: add x11, sp, #696 +; CHECK-NEXT: add x11, sp, #712 ; CHECK-NEXT: ld1 { v1.b }[14], [x10] -; CHECK-NEXT: add x10, sp, #456 +; CHECK-NEXT: add x10, sp, #472 ; CHECK-NEXT: ld1 { v0.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #440 +; CHECK-NEXT: add x9, sp, #456 ; CHECK-NEXT: ld1 { v3.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #56 +; CHECK-NEXT: add x8, sp, #72 ; CHECK-NEXT: ld1 { v4.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #72 +; CHECK-NEXT: add x10, sp, #88 ; CHECK-NEXT: ld1 { v2.b }[15], [x11] -; CHECK-NEXT: add x11, sp, #464 +; CHECK-NEXT: add x11, sp, #480 ; CHECK-NEXT: ld1 { v1.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #1352 +; CHECK-NEXT: add x8, sp, #1368 ; CHECK-NEXT: ld1 { v0.b }[15], [x9] -; CHECK-NEXT: add x9, sp, #968 +; CHECK-NEXT: add x9, sp, #984 ; CHECK-NEXT: ld1 { v5.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #80 +; CHECK-NEXT: add x10, sp, #96 ; CHECK-NEXT: ld1 { v18.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #1360 +; CHECK-NEXT: add x8, sp, #1376 ; CHECK-NEXT: ld1 { v19.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #976 +; CHECK-NEXT: add x9, sp, #992 ; CHECK-NEXT: ld1 { v4.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #472 +; CHECK-NEXT: add x11, sp, #488 ; CHECK-NEXT: ld1 { v5.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #88 +; CHECK-NEXT: add x10, sp, #104 ; CHECK-NEXT: ld1 { v18.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #1368 +; CHECK-NEXT: add x8, sp, #1384 ; CHECK-NEXT: ld1 { v19.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #984 +; CHECK-NEXT: add x9, sp, #1000 ; CHECK-NEXT: ld1 { v4.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #480 +; CHECK-NEXT: add x11, sp, #496 ; CHECK-NEXT: ld1 { v5.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #96 +; CHECK-NEXT: add x10, sp, #112 ; CHECK-NEXT: ld1 { v18.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #1376 +; CHECK-NEXT: add x8, sp, #1392 ; CHECK-NEXT: ld1 { v19.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #992 +; CHECK-NEXT: add x9, sp, #1008 ; CHECK-NEXT: ld1 { v4.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #488 +; CHECK-NEXT: add x11, sp, #504 ; CHECK-NEXT: ld1 { v5.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #104 +; CHECK-NEXT: add x10, sp, #120 ; CHECK-NEXT: ld1 { v18.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #1384 +; CHECK-NEXT: add x8, sp, #1400 ; CHECK-NEXT: ld1 { v19.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #1000 +; CHECK-NEXT: add x9, sp, #1016 ; CHECK-NEXT: ld1 { v4.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #496 +; CHECK-NEXT: add x11, sp, #512 ; CHECK-NEXT: ld1 { v5.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: add x10, sp, #128 ; CHECK-NEXT: ld1 { v18.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #1392 +; CHECK-NEXT: add x8, sp, #1408 ; CHECK-NEXT: ld1 { v19.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #1008 +; CHECK-NEXT: add x9, sp, #1024 ; CHECK-NEXT: ld1 { v4.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #504 +; CHECK-NEXT: add x11, sp, #520 ; CHECK-NEXT: ld1 { v5.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: add x10, sp, #136 ; CHECK-NEXT: ld1 { v18.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #1400 +; CHECK-NEXT: add x8, sp, #1416 ; CHECK-NEXT: ld1 { v19.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #1016 +; CHECK-NEXT: add x9, sp, #1032 ; CHECK-NEXT: ld1 { v4.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #512 +; CHECK-NEXT: add x11, sp, #528 ; CHECK-NEXT: ld1 { v5.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #128 +; CHECK-NEXT: add x10, sp, #144 ; CHECK-NEXT: ld1 { v18.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #1408 +; CHECK-NEXT: add x8, sp, #1424 ; CHECK-NEXT: ld1 { v19.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #1024 +; CHECK-NEXT: add x9, sp, #1040 ; CHECK-NEXT: ld1 { v4.b }[8], [x11] -; CHECK-NEXT: add x11, sp, #520 +; CHECK-NEXT: add x11, sp, #536 ; CHECK-NEXT: ld1 { v5.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #136 +; CHECK-NEXT: add x10, sp, #152 ; CHECK-NEXT: ld1 { v18.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #1416 +; CHECK-NEXT: add x8, sp, #1432 ; CHECK-NEXT: ld1 { v19.b }[8], [x9] -; CHECK-NEXT: add x9, sp, #1032 +; CHECK-NEXT: add x9, sp, #1048 ; CHECK-NEXT: ld1 { v4.b }[9], [x11] -; CHECK-NEXT: add x11, sp, #528 +; CHECK-NEXT: add x11, sp, #544 ; CHECK-NEXT: ld1 { v5.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #144 +; CHECK-NEXT: add x10, sp, #160 ; CHECK-NEXT: ld1 { v18.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #1424 +; CHECK-NEXT: add x8, sp, #1440 ; CHECK-NEXT: ld1 { v19.b }[9], [x9] -; CHECK-NEXT: add x9, sp, #1040 +; CHECK-NEXT: add x9, sp, #1056 ; CHECK-NEXT: ld1 { v4.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #536 +; CHECK-NEXT: add x11, sp, #552 ; CHECK-NEXT: ld1 { v5.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #152 +; CHECK-NEXT: add x10, sp, #168 ; CHECK-NEXT: ld1 { v18.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #1432 +; CHECK-NEXT: add x8, sp, #1448 ; CHECK-NEXT: ld1 { v19.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #1048 +; CHECK-NEXT: add x9, sp, #1064 ; CHECK-NEXT: ld1 { v4.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #544 +; CHECK-NEXT: add x11, sp, #560 ; CHECK-NEXT: ld1 { v5.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #160 +; CHECK-NEXT: add x10, sp, #176 ; CHECK-NEXT: ld1 { v18.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #1440 +; CHECK-NEXT: add x8, sp, #1456 ; CHECK-NEXT: ld1 { v19.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #1056 +; CHECK-NEXT: add x9, sp, #1072 ; CHECK-NEXT: ld1 { v4.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #552 +; CHECK-NEXT: add x11, sp, #568 ; CHECK-NEXT: ld1 { v5.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #168 +; CHECK-NEXT: add x10, sp, #184 ; CHECK-NEXT: ld1 { v18.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #1448 +; CHECK-NEXT: add x8, sp, #1464 ; CHECK-NEXT: ld1 { v19.b }[12], [x9] -; CHECK-NEXT: add x9, sp, #1064 +; CHECK-NEXT: add x9, sp, #1080 ; CHECK-NEXT: ld1 { v4.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #1112 +; CHECK-NEXT: add x11, sp, #1128 ; CHECK-NEXT: ld1 { v5.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #1096 +; CHECK-NEXT: add x10, sp, #1112 ; CHECK-NEXT: ld1 { v18.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #1456 +; CHECK-NEXT: add x8, sp, #1472 ; CHECK-NEXT: ld1 { v19.b }[13], [x9] -; CHECK-NEXT: add x9, sp, #1072 +; CHECK-NEXT: add x9, sp, #1088 ; CHECK-NEXT: ld1 { v16.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #712 +; CHECK-NEXT: add x10, sp, #728 ; CHECK-NEXT: sdot v7.4s, v3.16b, v2.16b -; CHECK-NEXT: ldr b2, [sp, #1216] +; CHECK-NEXT: ldr b2, [sp, #1232] ; CHECK-NEXT: ld1 { v18.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #1464 +; CHECK-NEXT: add x8, sp, #1480 ; CHECK-NEXT: ld1 { v19.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #1080 +; CHECK-NEXT: add x9, sp, #1096 ; CHECK-NEXT: ld1 { v17.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #840 -; CHECK-NEXT: ldr b3, [sp, #832] +; CHECK-NEXT: add x10, sp, #856 +; CHECK-NEXT: ldr b3, [sp, #848] ; CHECK-NEXT: ld1 { v18.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #560 +; CHECK-NEXT: add x8, sp, #576 ; CHECK-NEXT: ld1 { v19.b }[15], [x9] -; CHECK-NEXT: add x9, sp, #176 +; CHECK-NEXT: add x9, sp, #192 ; CHECK-NEXT: ld1 { v3.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #848 +; CHECK-NEXT: add x10, sp, #864 ; CHECK-NEXT: ld1 { v4.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #1104 +; CHECK-NEXT: add x8, sp, #1120 ; CHECK-NEXT: ld1 { v5.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #1224 +; CHECK-NEXT: add x9, sp, #1240 ; CHECK-NEXT: sdot v6.4s, v19.16b, v18.16b ; CHECK-NEXT: ld1 { v16.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #720 +; CHECK-NEXT: add x8, sp, #736 ; CHECK-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #1232 +; CHECK-NEXT: add x9, sp, #1248 ; CHECK-NEXT: ld1 { v3.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #856 +; CHECK-NEXT: add x10, sp, #872 ; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #728 +; CHECK-NEXT: add x8, sp, #744 ; CHECK-NEXT: ld1 { v16.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #1120 +; CHECK-NEXT: add x11, sp, #1136 ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #1240 +; CHECK-NEXT: add x9, sp, #1256 ; CHECK-NEXT: ld1 { v3.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #864 +; CHECK-NEXT: add x10, sp, #880 ; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #736 +; CHECK-NEXT: add x8, sp, #752 ; CHECK-NEXT: ld1 { v16.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #1128 +; CHECK-NEXT: add x11, sp, #1144 ; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #1248 +; CHECK-NEXT: add x9, sp, #1264 ; CHECK-NEXT: ld1 { v3.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #872 +; CHECK-NEXT: add x10, sp, #888 ; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #744 +; CHECK-NEXT: add x8, sp, #760 ; CHECK-NEXT: ld1 { v16.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #1136 +; CHECK-NEXT: add x11, sp, #1152 ; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #1256 +; CHECK-NEXT: add x9, sp, #1272 ; CHECK-NEXT: ld1 { v3.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #880 +; CHECK-NEXT: add x10, sp, #896 ; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #752 +; CHECK-NEXT: add x8, sp, #768 ; CHECK-NEXT: ld1 { v16.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #1144 +; CHECK-NEXT: add x11, sp, #1160 ; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #1264 +; CHECK-NEXT: add x9, sp, #1280 ; CHECK-NEXT: ld1 { v3.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #888 +; CHECK-NEXT: add x10, sp, #904 ; CHECK-NEXT: ld1 { v17.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #760 +; CHECK-NEXT: add x8, sp, #776 ; CHECK-NEXT: ld1 { v16.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #1152 +; CHECK-NEXT: add x11, sp, #1168 ; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #1272 +; CHECK-NEXT: add x9, sp, #1288 ; CHECK-NEXT: ld1 { v3.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #896 +; CHECK-NEXT: add x10, sp, #912 ; CHECK-NEXT: ld1 { v17.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #768 +; CHECK-NEXT: add x8, sp, #784 ; CHECK-NEXT: ld1 { v16.b }[8], [x11] -; CHECK-NEXT: add x11, sp, #1160 +; CHECK-NEXT: add x11, sp, #1176 ; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #1280 +; CHECK-NEXT: add x9, sp, #1296 ; CHECK-NEXT: ld1 { v3.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #904 +; CHECK-NEXT: add x10, sp, #920 ; CHECK-NEXT: ld1 { v17.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #776 +; CHECK-NEXT: add x8, sp, #792 ; CHECK-NEXT: ld1 { v16.b }[9], [x11] -; CHECK-NEXT: add x11, sp, #1168 +; CHECK-NEXT: add x11, sp, #1184 ; CHECK-NEXT: ld1 { v2.b }[8], [x9] -; CHECK-NEXT: add x9, sp, #1288 +; CHECK-NEXT: add x9, sp, #1304 ; CHECK-NEXT: ld1 { v3.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #912 +; CHECK-NEXT: add x10, sp, #928 ; CHECK-NEXT: ld1 { v17.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #784 +; CHECK-NEXT: add x8, sp, #800 ; CHECK-NEXT: ld1 { v16.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #1176 +; CHECK-NEXT: add x11, sp, #1192 ; CHECK-NEXT: ld1 { v2.b }[9], [x9] -; CHECK-NEXT: add x9, sp, #1296 +; CHECK-NEXT: add x9, sp, #1312 ; CHECK-NEXT: ld1 { v3.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #920 +; CHECK-NEXT: add x10, sp, #936 ; CHECK-NEXT: ld1 { v17.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #792 +; CHECK-NEXT: add x8, sp, #808 ; CHECK-NEXT: ld1 { v16.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #1184 +; CHECK-NEXT: add x11, sp, #1200 ; CHECK-NEXT: ld1 { v2.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #1304 +; CHECK-NEXT: add x9, sp, #1320 ; CHECK-NEXT: ld1 { v3.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #928 +; CHECK-NEXT: add x10, sp, #944 ; CHECK-NEXT: ld1 { v17.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #800 +; CHECK-NEXT: add x8, sp, #816 ; CHECK-NEXT: ld1 { v16.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #1192 +; CHECK-NEXT: add x11, sp, #1208 ; CHECK-NEXT: ld1 { v2.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #1312 +; CHECK-NEXT: add x9, sp, #1328 ; CHECK-NEXT: ld1 { v3.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #936 +; CHECK-NEXT: add x10, sp, #952 ; CHECK-NEXT: ld1 { v17.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #808 +; CHECK-NEXT: add x8, sp, #824 ; CHECK-NEXT: ld1 { v16.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #1200 +; CHECK-NEXT: add x11, sp, #1216 ; CHECK-NEXT: ld1 { v2.b }[12], [x9] -; CHECK-NEXT: add x9, sp, #1320 +; CHECK-NEXT: add x9, sp, #1336 ; CHECK-NEXT: ld1 { v3.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #944 +; CHECK-NEXT: add x10, sp, #960 ; CHECK-NEXT: ld1 { v17.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #816 +; CHECK-NEXT: add x8, sp, #832 ; CHECK-NEXT: ld1 { v16.b }[14], [x11] -; CHECK-NEXT: add x11, sp, #1208 +; CHECK-NEXT: add x11, sp, #1224 ; CHECK-NEXT: ld1 { v2.b }[13], [x9] -; CHECK-NEXT: add x9, sp, #1328 +; CHECK-NEXT: add x9, sp, #1344 ; CHECK-NEXT: ld1 { v3.b }[14], [x10] -; CHECK-NEXT: add x10, sp, #952 +; CHECK-NEXT: add x10, sp, #968 ; CHECK-NEXT: ld1 { v17.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #824 +; CHECK-NEXT: add x8, sp, #840 ; CHECK-NEXT: ld1 { v16.b }[15], [x11] -; CHECK-NEXT: add x11, sp, #568 +; CHECK-NEXT: add x11, sp, #584 ; CHECK-NEXT: ld1 { v2.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #1336 +; CHECK-NEXT: add x9, sp, #1352 ; CHECK-NEXT: sdot v7.4s, v1.16b, v0.16b ; CHECK-NEXT: ld1 { v3.b }[15], [x10] ; CHECK-NEXT: ld1 { v17.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #184 +; CHECK-NEXT: add x8, sp, #200 ; CHECK-NEXT: ld1 { v4.b }[15], [x11] ; CHECK-NEXT: ld1 { v2.b }[15], [x9] ; CHECK-NEXT: ld1 { v5.b }[15], [x8] @@ -2631,6 +2658,7 @@ define i32 @test_sdot_v48i8_double(<48 x i8> %a, <48 x i8> %b, <48 x i8> %c, <48 ; CHECK-NEXT: add v0.4s, v7.4s, v6.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <48 x i8> %a to <48 x i32> @@ -2648,188 +2676,191 @@ entry: define i32 @test_sdot_v48i8_double_nomla(<48 x i8> %a, <48 x i8> %b, <48 x i8> %c, <48 x i8> %d) { ; CHECK-LABEL: test_sdot_v48i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #192] -; CHECK-NEXT: add x8, sp, #200 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b0, [sp, #208] +; CHECK-NEXT: add x8, sp, #216 ; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: add x9, sp, #216 -; CHECK-NEXT: ldr b2, [sp, #64] -; CHECK-NEXT: add x11, sp, #72 +; CHECK-NEXT: add x9, sp, #232 +; CHECK-NEXT: ldr b2, [sp, #80] +; CHECK-NEXT: add x11, sp, #88 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #208 +; CHECK-NEXT: add x8, sp, #224 ; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: add x10, sp, #248 ; CHECK-NEXT: ld1 { v2.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #712 -; CHECK-NEXT: ldr b4, [sp, #704] -; CHECK-NEXT: add x12, sp, #968 +; CHECK-NEXT: add x11, sp, #728 +; CHECK-NEXT: ldr b4, [sp, #720] +; CHECK-NEXT: add x12, sp, #984 ; CHECK-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #224 +; CHECK-NEXT: add x8, sp, #240 ; CHECK-NEXT: mov v1.b[2], w2 -; CHECK-NEXT: ldr b3, [sp, #960] -; CHECK-NEXT: ldr b5, [sp, #832] -; CHECK-NEXT: add x13, sp, #80 +; CHECK-NEXT: ldr b3, [sp, #976] +; CHECK-NEXT: ldr b5, [sp, #848] +; CHECK-NEXT: add x13, sp, #96 ; CHECK-NEXT: ld1 { v4.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #840 +; CHECK-NEXT: add x11, sp, #856 ; CHECK-NEXT: ld1 { v0.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #240 +; CHECK-NEXT: add x9, sp, #256 ; CHECK-NEXT: mov v1.b[3], w3 ; CHECK-NEXT: ld1 { v3.b }[1], [x12] -; CHECK-NEXT: add x12, sp, #248 +; CHECK-NEXT: add x12, sp, #264 ; CHECK-NEXT: ld1 { v5.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #976 +; CHECK-NEXT: add x11, sp, #992 ; CHECK-NEXT: ld1 { v2.b }[2], [x13] ; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x13, sp, #720 +; CHECK-NEXT: add x13, sp, #736 ; CHECK-NEXT: mov v1.b[4], w4 -; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: add x8, sp, #272 ; CHECK-NEXT: ld1 { v3.b }[2], [x11] -; CHECK-NEXT: add x14, sp, #848 +; CHECK-NEXT: add x14, sp, #864 ; CHECK-NEXT: ld1 { v4.b }[2], [x13] -; CHECK-NEXT: add x13, sp, #984 +; CHECK-NEXT: add x13, sp, #1000 ; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x11, sp, #88 +; CHECK-NEXT: add x11, sp, #104 ; CHECK-NEXT: mov v1.b[5], w5 -; CHECK-NEXT: add x10, sp, #264 +; CHECK-NEXT: add x10, sp, #280 ; CHECK-NEXT: ld1 { v3.b }[3], [x13] -; CHECK-NEXT: mov x13, sp +; CHECK-NEXT: add x13, sp, #16 ; CHECK-NEXT: ld1 { v5.b }[2], [x14] -; CHECK-NEXT: add x14, sp, #280 +; CHECK-NEXT: add x14, sp, #296 ; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #272 +; CHECK-NEXT: add x9, sp, #288 ; CHECK-NEXT: mov v1.b[6], w6 ; CHECK-NEXT: ld1 { v2.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #856 +; CHECK-NEXT: add x11, sp, #872 ; CHECK-NEXT: movi v6.16b, #1 ; CHECK-NEXT: ld1 { v0.b }[7], [x12] -; CHECK-NEXT: add x12, sp, #728 +; CHECK-NEXT: add x12, sp, #744 ; CHECK-NEXT: mov v1.b[7], w7 ; CHECK-NEXT: ld1 { v5.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #992 +; CHECK-NEXT: add x11, sp, #1008 ; CHECK-NEXT: ld1 { v4.b }[3], [x12] -; CHECK-NEXT: add x12, sp, #96 +; CHECK-NEXT: add x12, sp, #112 ; CHECK-NEXT: ld1 { v0.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #288 +; CHECK-NEXT: add x8, sp, #304 ; CHECK-NEXT: ld1 { v1.b }[8], [x13] -; CHECK-NEXT: add x13, sp, #8 +; CHECK-NEXT: add x13, sp, #24 ; CHECK-NEXT: ld1 { v3.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #864 +; CHECK-NEXT: add x11, sp, #880 ; CHECK-NEXT: ld1 { v2.b }[4], [x12] -; CHECK-NEXT: add x12, sp, #736 +; CHECK-NEXT: add x12, sp, #752 ; CHECK-NEXT: ld1 { v0.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #296 +; CHECK-NEXT: add x10, sp, #312 ; CHECK-NEXT: ld1 { v1.b }[9], [x13] -; CHECK-NEXT: add x13, sp, #16 +; CHECK-NEXT: add x13, sp, #32 ; CHECK-NEXT: ld1 { v5.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #1000 +; CHECK-NEXT: add x11, sp, #1016 ; CHECK-NEXT: ld1 { v4.b }[4], [x12] -; CHECK-NEXT: add x12, sp, #104 +; CHECK-NEXT: add x12, sp, #120 ; CHECK-NEXT: ld1 { v0.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #304 +; CHECK-NEXT: add x9, sp, #320 ; CHECK-NEXT: ld1 { v1.b }[10], [x13] -; CHECK-NEXT: add x13, sp, #24 +; CHECK-NEXT: add x13, sp, #40 ; CHECK-NEXT: ld1 { v3.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #872 +; CHECK-NEXT: add x11, sp, #888 ; CHECK-NEXT: ld1 { v2.b }[5], [x12] -; CHECK-NEXT: add x12, sp, #744 +; CHECK-NEXT: add x12, sp, #760 ; CHECK-NEXT: ld1 { v0.b }[11], [x14] -; CHECK-NEXT: add x14, sp, #312 +; CHECK-NEXT: add x14, sp, #328 ; CHECK-NEXT: ld1 { v1.b }[11], [x13] -; CHECK-NEXT: add x13, sp, #32 +; CHECK-NEXT: add x13, sp, #48 ; CHECK-NEXT: ld1 { v4.b }[5], [x12] -; CHECK-NEXT: add x12, sp, #40 +; CHECK-NEXT: add x12, sp, #56 ; CHECK-NEXT: ld1 { v5.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #112 +; CHECK-NEXT: add x11, sp, #128 ; CHECK-NEXT: ld1 { v0.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #1008 +; CHECK-NEXT: add x8, sp, #1024 ; CHECK-NEXT: ld1 { v1.b }[12], [x13] ; CHECK-NEXT: ld1 { v2.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #48 +; CHECK-NEXT: add x11, sp, #64 ; CHECK-NEXT: ld1 { v3.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #1016 +; CHECK-NEXT: add x8, sp, #1032 ; CHECK-NEXT: ld1 { v0.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #752 +; CHECK-NEXT: add x10, sp, #768 ; CHECK-NEXT: ld1 { v1.b }[13], [x12] ; CHECK-NEXT: movi v7.2d, #0000000000000000 ; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #1024 +; CHECK-NEXT: add x8, sp, #1040 ; CHECK-NEXT: ld1 { v4.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #880 +; CHECK-NEXT: add x10, sp, #896 ; CHECK-NEXT: ld1 { v0.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #760 +; CHECK-NEXT: add x9, sp, #776 ; CHECK-NEXT: ld1 { v1.b }[14], [x11] -; CHECK-NEXT: add x11, sp, #120 +; CHECK-NEXT: add x11, sp, #136 ; CHECK-NEXT: ld1 { v3.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #1032 +; CHECK-NEXT: add x8, sp, #1048 ; CHECK-NEXT: ld1 { v4.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #768 +; CHECK-NEXT: add x9, sp, #784 ; CHECK-NEXT: ld1 { v5.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #888 +; CHECK-NEXT: add x10, sp, #904 ; CHECK-NEXT: ld1 { v2.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #128 +; CHECK-NEXT: add x11, sp, #144 ; CHECK-NEXT: ld1 { v3.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #1040 +; CHECK-NEXT: add x8, sp, #1056 ; CHECK-NEXT: ld1 { v4.b }[8], [x9] -; CHECK-NEXT: add x9, sp, #776 +; CHECK-NEXT: add x9, sp, #792 ; CHECK-NEXT: ld1 { v5.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #896 +; CHECK-NEXT: add x10, sp, #912 ; CHECK-NEXT: ld1 { v2.b }[8], [x11] -; CHECK-NEXT: add x11, sp, #136 +; CHECK-NEXT: add x11, sp, #152 ; CHECK-NEXT: ld1 { v3.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #1048 +; CHECK-NEXT: add x8, sp, #1064 ; CHECK-NEXT: ld1 { v4.b }[9], [x9] -; CHECK-NEXT: add x9, sp, #784 +; CHECK-NEXT: add x9, sp, #800 ; CHECK-NEXT: ld1 { v5.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #904 +; CHECK-NEXT: add x10, sp, #920 ; CHECK-NEXT: ld1 { v2.b }[9], [x11] -; CHECK-NEXT: add x11, sp, #144 +; CHECK-NEXT: add x11, sp, #160 ; CHECK-NEXT: ld1 { v3.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #1056 +; CHECK-NEXT: add x8, sp, #1072 ; CHECK-NEXT: ld1 { v4.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #792 +; CHECK-NEXT: add x9, sp, #808 ; CHECK-NEXT: ld1 { v5.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #912 +; CHECK-NEXT: add x10, sp, #928 ; CHECK-NEXT: ld1 { v2.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #152 +; CHECK-NEXT: add x11, sp, #168 ; CHECK-NEXT: ld1 { v3.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #1064 +; CHECK-NEXT: add x8, sp, #1080 ; CHECK-NEXT: ld1 { v4.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #800 +; CHECK-NEXT: add x9, sp, #816 ; CHECK-NEXT: ld1 { v5.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #920 +; CHECK-NEXT: add x10, sp, #936 ; CHECK-NEXT: ld1 { v2.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #160 +; CHECK-NEXT: add x11, sp, #176 ; CHECK-NEXT: ld1 { v3.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #1072 +; CHECK-NEXT: add x8, sp, #1088 ; CHECK-NEXT: ld1 { v4.b }[12], [x9] -; CHECK-NEXT: add x9, sp, #808 +; CHECK-NEXT: add x9, sp, #824 ; CHECK-NEXT: ld1 { v5.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #928 +; CHECK-NEXT: add x10, sp, #944 ; CHECK-NEXT: ld1 { v2.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #168 +; CHECK-NEXT: add x11, sp, #184 ; CHECK-NEXT: ld1 { v3.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #1080 +; CHECK-NEXT: add x8, sp, #1096 ; CHECK-NEXT: ld1 { v4.b }[13], [x9] -; CHECK-NEXT: add x9, sp, #816 +; CHECK-NEXT: add x9, sp, #832 ; CHECK-NEXT: ld1 { v5.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #936 +; CHECK-NEXT: add x10, sp, #952 ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: ld1 { v0.b }[15], [x14] ; CHECK-NEXT: ld1 { v2.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #56 +; CHECK-NEXT: add x11, sp, #72 ; CHECK-NEXT: ld1 { v3.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #176 +; CHECK-NEXT: add x8, sp, #192 ; CHECK-NEXT: ld1 { v4.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #824 +; CHECK-NEXT: add x9, sp, #840 ; CHECK-NEXT: ld1 { v5.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #944 +; CHECK-NEXT: add x10, sp, #960 ; CHECK-NEXT: ld1 { v1.b }[15], [x11] ; CHECK-NEXT: sdot v16.4s, v0.16b, v6.16b ; CHECK-NEXT: ld1 { v2.b }[14], [x8] ; CHECK-NEXT: sdot v7.4s, v3.16b, v6.16b ; CHECK-NEXT: ld1 { v4.b }[15], [x9] ; CHECK-NEXT: ld1 { v5.b }[14], [x10] -; CHECK-NEXT: add x8, sp, #184 -; CHECK-NEXT: add x9, sp, #952 +; CHECK-NEXT: add x8, sp, #200 +; CHECK-NEXT: add x9, sp, #968 ; CHECK-NEXT: sdot v16.4s, v1.16b, v6.16b ; CHECK-NEXT: ld1 { v2.b }[15], [x8] ; CHECK-NEXT: sdot v7.4s, v4.16b, v6.16b @@ -2839,6 +2870,7 @@ define i32 @test_sdot_v48i8_double_nomla(<48 x i8> %a, <48 x i8> %b, <48 x i8> % ; CHECK-NEXT: add v0.4s, v16.4s, v7.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <48 x i8> %a to <48 x i32> diff --git a/llvm/test/CodeGen/AArch64/scavenge-large-call.ll b/llvm/test/CodeGen/AArch64/scavenge-large-call.ll new file mode 100644 index 0000000..0c9bdd0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/scavenge-large-call.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=arm64_32-apple-watchos %s -o - | FileCheck %s + +; CHECK-LABEL: caller: +; CHECK: add {{x[0-9]+}}, sp, + +define void @caller(ptr %0, i16 %1, i16 %2, i8 %3, double %4, i16 %5, i8 %6, ptr %7, double %8, i32 %9, ptr %10, double %11, double %12, [2 x i64] %13, [2 x i64] %14, [2 x i64] %15, double %16, double %17, [2 x i64] %18, [2 x i64] %19, i16 %20, i32 %21, double %22, i8 %23, [2 x i64] %24, [2 x i64] %25, [2 x i64] %26, i8 %27, i16 %28, i16 %29, i16 %30, i32 %31, [2 x i64] %32, [2 x i64] %33, [2 x i64] %34, [2 x i64] %35, [2 x i64] %36, i32 %37, i32 %38) { + tail call void @callee(ptr %0, i16 %1, i16 %2, i8 %3, double 0.000000e+00, i16 %5, i8 %6, ptr %7, double 0.000000e+00, i32 %9, ptr %10, double 0.000000e+00, double 0.000000e+00, [2 x i64] %13, [2 x i64] %14, [2 x i64] %15, double 0.000000e+00, double 0.000000e+00, [2 x i64] %18, [2 x i64] %19, i16 %20, i32 %21, double 0.000000e+00, i8 %23, [2 x i64] %24, [2 x i64] %25, [2 x i64] zeroinitializer, i8 %27, i16 0, i16 0, i16 %28, i32 0, [2 x i64] zeroinitializer, [2 x i64] zeroinitializer, [2 x i64] zeroinitializer, [2 x i64] %35, [2 x i64] %36, i32 0, i32 0) + ret void +} + +declare void @callee(ptr, i16, i16, i8, double, i16, i8, ptr, double, i32, ptr, double, double, [2 x i64], [2 x i64], [2 x i64], double, double, [2 x i64], [2 x i64], i16, i32, double, i8, [2 x i64], [2 x i64], [2 x i64], i8, i16, i16, i16, i32, [2 x i64], [2 x i64], [2 x i64], [2 x i64], [2 x i64], i32, i32) diff --git a/llvm/test/CodeGen/AArch64/stackmap.ll b/llvm/test/CodeGen/AArch64/stackmap.ll index ab238f7..199206b 100644 --- a/llvm/test/CodeGen/AArch64/stackmap.ll +++ b/llvm/test/CodeGen/AArch64/stackmap.ll @@ -38,7 +38,7 @@ ; CHECK-NEXT: .xword 16 ; CHECK-NEXT: .xword 1 ; CHECK-NEXT: .xword spilledValue -; CHECK-NEXT: .xword 144 +; CHECK-NEXT: .xword 160 ; CHECK-NEXT: .xword 1 ; CHECK-NEXT: .xword spilledStackMapValue ; CHECK-NEXT: .xword 128 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll index 4dc3c5e..5d5c59a 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll @@ -11,7 +11,7 @@ target triple = "aarch64-unknown-linux-gnu" define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v6, ptr %v7, ptr %v8, ; CHECK-LABEL: func1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x25, [sp, #-64]! // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x25, [sp, #-64]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill @@ -22,7 +22,8 @@ define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w23, -40 ; CHECK-NEXT: .cfi_offset w24, -48 -; CHECK-NEXT: .cfi_offset w25, -64 +; CHECK-NEXT: .cfi_offset w25, -56 +; CHECK-NEXT: .cfi_offset w29, -64 ; CHECK-NEXT: add x8, sp, #64 ; CHECK-NEXT: add x9, sp, #128 ; CHECK-NEXT: add x10, sp, #160 @@ -61,7 +62,7 @@ define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v ; CHECK-NEXT: stp x12, x11, [sp, #320] ; CHECK-NEXT: stp x10, x9, [sp, #336] ; CHECK-NEXT: str x8, [sp, #352] -; CHECK-NEXT: ldr x25, [sp], #64 // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x25, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: b func2 ptr %v9, ptr %v10, ptr %v11, ptr %v12, ptr %v13, ptr %v14, ptr %v15, ptr %v16, ptr %v17, ptr %v18, ptr %v19, ptr %v20, ptr %v21, ptr %v22, ptr %v23, ptr %v24, -- 2.7.4