[RISCV] Ensure the entire stack is aligned to the RVV stack alignment

author Fraser Cormack <fraser@codeplay.com>

Thu, 19 May 2022 13:47:40 +0000 (14:47 +0100)

committer Fraser Cormack <fraser@codeplay.com>

Tue, 24 May 2022 05:58:51 +0000 (06:58 +0100)
author Fraser Cormack <fraser@codeplay.com>
Thu, 19 May 2022 13:47:40 +0000 (14:47 +0100)
committer Fraser Cormack <fraser@codeplay.com>
Tue, 24 May 2022 05:58:51 +0000 (06:58 +0100)
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

index 5b789a9..f774433 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -962,6 +962,11 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
    RVFI->setRVVStackSize(RVVStackSize);
    RVFI->setRVVStackAlign(RVVStackAlign);
  
+  // Ensure the entire stack is aligned to at least the RVV requirement: some
+  // scalable-vector object alignments are not considered by the
+  // target-independent code.
+  MFI.ensureMaxAlignment(RVVStackAlign);
+
    const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
  
    // estimateStackSize has been observed to under-estimate the final stack
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll

index 458e5b9..c0cdad9 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -290,9 +290,12 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
  ; RV32-NEXT:    .cfi_def_cfa_offset 144
  ; RV32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
  ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    addi s0, sp, 144
+; RV32-NEXT:    .cfi_def_cfa s0, 0
  ; RV32-NEXT:    csrr a1, vlenb
  ; RV32-NEXT:    slli a1, a1, 4
  ; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    andi sp, sp, -128
  ; RV32-NEXT:    csrr a1, vlenb
  ; RV32-NEXT:    slli a1, a1, 3
  ; RV32-NEXT:    add a3, a0, a1
@@ -308,9 +311,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
  ; RV32-NEXT:    vmv8r.v v8, v0
  ; RV32-NEXT:    vmv8r.v v16, v24
  ; RV32-NEXT:    call ext2@plt
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, s0, -144
  ; RV32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
  ; RV32-NEXT:    addi sp, sp, 144
  ; RV32-NEXT:    ret
@@ -321,9 +322,12 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
  ; RV64-NEXT:    .cfi_def_cfa_offset 144
  ; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
  ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    addi s0, sp, 144
+; RV64-NEXT:    .cfi_def_cfa s0, 0
  ; RV64-NEXT:    csrr a1, vlenb
  ; RV64-NEXT:    slli a1, a1, 4
  ; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    andi sp, sp, -128
  ; RV64-NEXT:    csrr a1, vlenb
  ; RV64-NEXT:    slli a1, a1, 3
  ; RV64-NEXT:    add a3, a0, a1
@@ -339,9 +343,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
  ; RV64-NEXT:    vmv8r.v v8, v0
  ; RV64-NEXT:    vmv8r.v v16, v24
  ; RV64-NEXT:    call ext2@plt
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, s0, -144
  ; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
  ; RV64-NEXT:    addi sp, sp, 144
  ; RV64-NEXT:    ret
@@ -356,10 +358,13 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
  ; RV32-NEXT:    .cfi_def_cfa_offset 144
  ; RV32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
  ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    addi s0, sp, 144
+; RV32-NEXT:    .cfi_def_cfa s0, 0
  ; RV32-NEXT:    csrr a1, vlenb
  ; RV32-NEXT:    li a3, 48
  ; RV32-NEXT:    mul a1, a1, a3
  ; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    andi sp, sp, -128
  ; RV32-NEXT:    csrr a1, vlenb
  ; RV32-NEXT:    slli a1, a1, 3
  ; RV32-NEXT:    add a3, a2, a1
@@ -414,10 +419,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
  ; RV32-NEXT:    addi a1, a1, 128
  ; RV32-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
  ; RV32-NEXT:    call ext3@plt
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
-; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, s0, -144
  ; RV32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
  ; RV32-NEXT:    addi sp, sp, 144
  ; RV32-NEXT:    ret
@@ -428,10 +430,13 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
  ; RV64-NEXT:    .cfi_def_cfa_offset 144
  ; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
  ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    addi s0, sp, 144
+; RV64-NEXT:    .cfi_def_cfa s0, 0
  ; RV64-NEXT:    csrr a1, vlenb
  ; RV64-NEXT:    li a3, 48
  ; RV64-NEXT:    mul a1, a1, a3
  ; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    andi sp, sp, -128
  ; RV64-NEXT:    csrr a1, vlenb
  ; RV64-NEXT:    slli a1, a1, 3
  ; RV64-NEXT:    add a3, a2, a1
@@ -486,10 +491,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
  ; RV64-NEXT:    addi a1, a1, 128
  ; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
  ; RV64-NEXT:    call ext3@plt
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    li a1, 48
-; RV64-NEXT:    mul a0, a0, a1
-; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, s0, -144
  ; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
  ; RV64-NEXT:    addi sp, sp, 144
  ; RV64-NEXT:    ret
@@ -524,9 +526,12 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
  ; RV32-NEXT:    .cfi_def_cfa_offset 144
  ; RV32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
  ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    addi s0, sp, 144
+; RV32-NEXT:    .cfi_def_cfa s0, 0
  ; RV32-NEXT:    csrr a0, vlenb
  ; RV32-NEXT:    slli a0, a0, 5
  ; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    andi sp, sp, -128
  ; RV32-NEXT:    csrr a0, vlenb
  ; RV32-NEXT:    slli a0, a0, 3
  ; RV32-NEXT:    addi a1, sp, 128
@@ -563,9 +568,7 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
  ; RV32-NEXT:    li a0, 0
  ; RV32-NEXT:    vmv.v.i v16, 0
  ; RV32-NEXT:    call vector_arg_indirect_stack@plt
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, s0, -144
  ; RV32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
  ; RV32-NEXT:    addi sp, sp, 144
  ; RV32-NEXT:    ret
@@ -576,9 +579,12 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
  ; RV64-NEXT:    .cfi_def_cfa_offset 144
  ; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
  ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    addi s0, sp, 144
+; RV64-NEXT:    .cfi_def_cfa s0, 0
  ; RV64-NEXT:    csrr a0, vlenb
  ; RV64-NEXT:    slli a0, a0, 5
  ; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    andi sp, sp, -128
  ; RV64-NEXT:    csrr a0, vlenb
  ; RV64-NEXT:    slli a0, a0, 3
  ; RV64-NEXT:    addi a1, sp, 128
@@ -615,9 +621,7 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
  ; RV64-NEXT:    li a0, 0
  ; RV64-NEXT:    vmv.v.i v16, 0
  ; RV64-NEXT:    call vector_arg_indirect_stack@plt
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 5
-; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, s0, -144
  ; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
  ; RV64-NEXT:    addi sp, sp, 144
  ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll

index 2875e06..d4dd603 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
@@ -27,9 +27,12 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
  ; RV32-NEXT:    .cfi_def_cfa_offset 144
  ; RV32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
  ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    addi s0, sp, 144
+; RV32-NEXT:    .cfi_def_cfa s0, 0
  ; RV32-NEXT:    csrr a0, vlenb
  ; RV32-NEXT:    slli a0, a0, 4
  ; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    andi sp, sp, -128
  ; RV32-NEXT:    csrr a0, vlenb
  ; RV32-NEXT:    slli a0, a0, 3
  ; RV32-NEXT:    addi a1, sp, 128
@@ -42,9 +45,7 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
  ; RV32-NEXT:    addi a0, sp, 128
  ; RV32-NEXT:    vmv.v.i v16, 0
  ; RV32-NEXT:    call callee_scalable_vector_split_indirect@plt
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, s0, -144
  ; RV32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
  ; RV32-NEXT:    addi sp, sp, 144
  ; RV32-NEXT:    ret
@@ -55,9 +56,12 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
  ; RV64-NEXT:    .cfi_def_cfa_offset 144
  ; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
  ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    addi s0, sp, 144
+; RV64-NEXT:    .cfi_def_cfa s0, 0
  ; RV64-NEXT:    csrr a0, vlenb
  ; RV64-NEXT:    slli a0, a0, 4
  ; RV64-NEXT:    sub sp, sp, a0
+; RV64-NEXT:    andi sp, sp, -128
  ; RV64-NEXT:    csrr a0, vlenb
  ; RV64-NEXT:    slli a0, a0, 3
  ; RV64-NEXT:    addi a1, sp, 128
@@ -70,9 +74,7 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
  ; RV64-NEXT:    addi a0, sp, 128
  ; RV64-NEXT:    vmv.v.i v16, 0
  ; RV64-NEXT:    call callee_scalable_vector_split_indirect@plt
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, s0, -144
  ; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
  ; RV64-NEXT:    addi sp, sp, 144
  ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir

index 4f0abf2..41986a8 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir
@@ -14,10 +14,10 @@
    ; CHECK-NEXT:    sd a0, 32(sp)
    ; CHECK-NEXT:    sd a0, 16(sp)
    ; CHECK-NEXT:    vsetivli a5, 1, e16, m1, ta, mu
-  ; CHECK-NEXT:    sd a1, 8(sp)
+  ; CHECK-NEXT:    sd a1, 0(sp)
    ; CHECK-NEXT:    addi a1, sp, 24
    ; CHECK-NEXT:    vs1r.v v25, (a1) # Unknown-size Folded Spill
-  ; CHECK-NEXT:    ld a1, 8(sp)
+  ; CHECK-NEXT:    ld a1, 0(sp)
    ; CHECK-NEXT:    call fixedlen_vector_spillslot@plt
    ; CHECK-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
    ; CHECK-NEXT:    addi sp, sp, 48
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll

index 90761f2..851ce28 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -561,9 +561,12 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out
  ; CHECK:       # %bb.0:
  ; CHECK-NEXT:    addi sp, sp, -64
  ; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    addi s0, sp, 64
+; CHECK-NEXT:    .cfi_def_cfa s0, 0
  ; CHECK-NEXT:    csrr a2, vlenb
  ; CHECK-NEXT:    slli a2, a2, 4
  ; CHECK-NEXT:    sub sp, sp, a2
+; CHECK-NEXT:    andi sp, sp, -64
  ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
  ; CHECK-NEXT:    vle64.v v8, (a0)
  ; CHECK-NEXT:    addi a0, sp, 128
@@ -578,9 +581,7 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out
  ; CHECK-NEXT:    add a0, a1, a0
  ; CHECK-NEXT:    vs8r.v v8, (a0)
  ; CHECK-NEXT:    vs8r.v v16, (a1)
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, s0, -64
  ; CHECK-NEXT:    addi sp, sp, 64
  ; CHECK-NEXT:    ret
    %sv = load <2 x i64>, <2 x i64>* %psv
diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll

index 3c2fda4..bedcfd6 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
@@ -18,7 +18,7 @@ define signext i32 @foo(i32 signext %aa) #0 {
  ; CHECK-NEXT:    csrr a1, vlenb
  ; CHECK-NEXT:    slli a1, a1, 1
  ; CHECK-NEXT:    sub sp, sp, a1
-; CHECK-NEXT:    andi sp, sp, -8
+; CHECK-NEXT:    andi sp, sp, -16
  ; CHECK-NEXT:    mv s1, sp
  ; CHECK-NEXT:    lw t0, 44(s1)
  ; CHECK-NEXT:    lw a2, 40(s1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll

index d7be0ea..1c51758 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
@@ -26,28 +26,30 @@ define <vscale x 16 x i32> @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5,
  ; CHECK-NEXT:    .cfi_def_cfa_offset 80
  ; CHECK-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
  ; CHECK-NEXT:    .cfi_offset ra, -8
+; CHECK-NEXT:    addi s0, sp, 80
+; CHECK-NEXT:    .cfi_def_cfa s0, 0
  ; CHECK-NEXT:    csrr t0, vlenb
  ; CHECK-NEXT:    slli t0, t0, 4
  ; CHECK-NEXT:    sub sp, sp, t0
-; CHECK-NEXT:    addi t0, sp, 64
+; CHECK-NEXT:    andi sp, sp, -64
+; CHECK-NEXT:    mv s1, sp
+; CHECK-NEXT:    addi t0, s1, 64
  ; CHECK-NEXT:    sd t0, 8(sp)
  ; CHECK-NEXT:    csrr t0, vlenb
  ; CHECK-NEXT:    slli t0, t0, 3
-; CHECK-NEXT:    add t0, sp, t0
+; CHECK-NEXT:    add t0, s1, t0
  ; CHECK-NEXT:    addi t0, t0, 64
  ; CHECK-NEXT:    sd t0, 0(sp)
-; CHECK-NEXT:    addi t0, sp, 64
+; CHECK-NEXT:    addi t0, s1, 64
  ; CHECK-NEXT:    vs8r.v v8, (t0)
  ; CHECK-NEXT:    csrr t0, vlenb
  ; CHECK-NEXT:    slli t0, t0, 3
-; CHECK-NEXT:    add t0, sp, t0
+; CHECK-NEXT:    add t0, s1, t0
  ; CHECK-NEXT:    addi t0, t0, 64
  ; CHECK-NEXT:    vs8r.v v8, (t0)
  ; CHECK-NEXT:    vmv8r.v v16, v8
  ; CHECK-NEXT:    call bar@plt
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, s0, -80
  ; CHECK-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
  ; CHECK-NEXT:    addi sp, sp, 80
  ; CHECK-NEXT:    ret
author	Fraser Cormack <fraser@codeplay.com>
	Thu, 19 May 2022 13:47:40 +0000 (14:47 +0100)
committer	Fraser Cormack <fraser@codeplay.com>
	Tue, 24 May 2022 05:58:51 +0000 (06:58 +0100)
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/calling-conv.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll		patch \| blob \| history