Add bti intructions to aarch64 assembly to work with strict (#808)
authorTobias Heider <tobias.heider@stusta.de>
Thu, 23 Nov 2023 14:15:29 +0000 (15:15 +0100)
committerGitHub <noreply@github.com>
Thu, 23 Nov 2023 14:15:29 +0000 (09:15 -0500)
BTI enforcement on OpenBSD.

src/aarch64/ffi.c
src/aarch64/sysv.S

index 6544ac0fe3e1c5410abb7d063ffc226ed91a9568..67b18fbfb71d333faef0c2f4843e747445b5e106 100644 (file)
@@ -390,47 +390,59 @@ extend_hfa_type (void *dest, void *src, int h)
        "adr    %0, 0f\n"
 "      add     %0, %0, %1\n"
 "      br      %0\n"
-"0:    ldp     s16, s17, [%3]\n"       /* S4 */
+"0:    bti     j\n"                    /* S4 */
+"      ldp     s16, s17, [%3]\n"
 "      ldp     s18, s19, [%3, #8]\n"
 "      b       4f\n"
-"      ldp     s16, s17, [%3]\n"       /* S3 */
+"      bti     j\n"                    /* S3 */
+"      ldp     s16, s17, [%3]\n"
 "      ldr     s18, [%3, #8]\n"
 "      b       3f\n"
-"      ldp     s16, s17, [%3]\n"       /* S2 */
+"      bti     j\n"                    /* S2 */
+"      ldp     s16, s17, [%3]\n"
 "      b       2f\n"
 "      nop\n"
-"      ldr     s16, [%3]\n"            /* S1 */
+"      bti     j\n"                    /* S1 */
+"      ldr     s16, [%3]\n"
 "      b       1f\n"
 "      nop\n"
-"      ldp     d16, d17, [%3]\n"       /* D4 */
+"      bti     j\n"                    /* D4 */
+"      ldp     d16, d17, [%3]\n"
 "      ldp     d18, d19, [%3, #16]\n"
 "      b       4f\n"
-"      ldp     d16, d17, [%3]\n"       /* D3 */
+"      bti     j\n"                    /* D3 */
+"      ldp     d16, d17, [%3]\n"
 "      ldr     d18, [%3, #16]\n"
 "      b       3f\n"
-"      ldp     d16, d17, [%3]\n"       /* D2 */
+"      bti     j\n"                    /* D2 */
+"      ldp     d16, d17, [%3]\n"
 "      b       2f\n"
 "      nop\n"
-"      ldr     d16, [%3]\n"            /* D1 */
+"      bti     j\n"                    /* D1 */
+"      ldr     d16, [%3]\n"
 "      b       1f\n"
 "      nop\n"
-"      ldp     q16, q17, [%3]\n"       /* Q4 */
+"      bti     j\n"                    /* Q4 */
+"      ldp     q16, q17, [%3]\n"
 "      ldp     q18, q19, [%3, #32]\n"
 "      b       4f\n"
-"      ldp     q16, q17, [%3]\n"       /* Q3 */
+"      bti     j\n"                    /* Q3 */
+"      ldp     q16, q17, [%3]\n"
 "      ldr     q18, [%3, #32]\n"
 "      b       3f\n"
-"      ldp     q16, q17, [%3]\n"       /* Q2 */
+"      bti     j\n"                    /* Q2 */
+"      ldp     q16, q17, [%3]\n"
 "      b       2f\n"
 "      nop\n"
-"      ldr     q16, [%3]\n"            /* Q1 */
+"      bti     j\n"                    /* Q1 */
+"      ldr     q16, [%3]\n"
 "      b       1f\n"
 "4:    str     q19, [%2, #48]\n"
 "3:    str     q18, [%2, #32]\n"
 "2:    str     q17, [%2, #16]\n"
 "1:    str     q16, [%2]"
     : "=&r"(x0)
-    : "r"(f * 12), "r"(dest), "r"(src)
+    : "r"(f * 16), "r"(dest), "r"(src)
     : "memory", "v16", "v17", "v18", "v19");
 }
 #endif
index 286f1f13764f60ab8f96e627ac2db599e445b8d1..9f4188e00faca0b4b62894e379c03d09e8ac0aa3 100644 (file)
@@ -84,6 +84,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 
        cfi_startproc
 CNAME(ffi_call_SYSV):
+       bti     c
        /* Sign the lr with x1 since that is where it will be stored */
        SIGN_LR_WITH_REG(x1)
 
@@ -144,78 +145,142 @@ CNAME(ffi_call_SYSV):
        /* Save the return value as directed.  */
        adr     x5, 0f
        and     w4, w4, #AARCH64_RET_MASK
-       add     x5, x5, x4, lsl #3
+       add     x5, x5, x4, lsl #4
        br      x5
 
-       /* Note that each table entry is 2 insns, and thus 8 bytes.
+       /* Note that each table entry is 4 insns, and thus 16 bytes.
           For integer data, note that we're storing into ffi_arg
           and therefore we want to extend to 64 bits; these types
           have two consecutive entries allocated for them.  */
        .align  4
-0:     b 99f                           /* VOID */
+0:     bti     j                       /* VOID */
+       b 99f
+       nop 
        nop
-1:     str     x0, [x3]                /* INT64 */
+1:     bti     j                       /* INT64 */
+       str     x0, [x3]
        b 99f
-2:     stp     x0, x1, [x3]            /* INT128 */
+       nop
+2:     bti     j                       /* INT128 */
+       stp     x0, x1, [x3]
        b 99f
+       nop
 3:     brk     #1000                   /* UNUSED */
        b 99f
+       nop
+       nop
 4:     brk     #1000                   /* UNUSED */
        b 99f
+       nop
+       nop
 5:     brk     #1000                   /* UNUSED */
        b 99f
+       nop
+       nop
 6:     brk     #1000                   /* UNUSED */
        b 99f
+       nop
+       nop
 7:     brk     #1000                   /* UNUSED */
        b 99f
-8:     st4     { v0.s, v1.s, v2.s, v3.s }[0], [x3]     /* S4 */
+       nop
+       nop
+8:     bti     j                       /* S4 */
+       st4     { v0.s, v1.s, v2.s, v3.s }[0], [x3]
        b 99f
-9:     st3     { v0.s, v1.s, v2.s }[0], [x3]   /* S3 */
+       nop
+9:     bti     j                       /* S3 */
+       st3     { v0.s, v1.s, v2.s }[0], [x3]
        b 99f
-10:    stp     s0, s1, [x3]            /* S2 */
+       nop
+10:    bti     j                       /* S2 */
+       stp     s0, s1, [x3]
        b 99f
-11:    str     s0, [x3]                /* S1 */
+       nop
+11:    bti     j
+       str     s0, [x3]                /* S1 */
        b 99f
-12:    st4     { v0.d, v1.d, v2.d, v3.d }[0], [x3]     /* D4 */
+       nop
+12:    bti     j                       /* D4 */
+       st4     { v0.d, v1.d, v2.d, v3.d }[0], [x3]
        b 99f
-13:    st3     { v0.d, v1.d, v2.d }[0], [x3]   /* D3 */
+       nop
+13:    bti     j                       /* D3 */
+       st3     { v0.d, v1.d, v2.d }[0], [x3]
        b 99f
-14:    stp     d0, d1, [x3]            /* D2 */
+       nop
+14:    bti     j                       /* D2 */
+       stp     d0, d1, [x3]
        b 99f
-15:    str     d0, [x3]                /* D1 */
+       nop
+15:    bti     j                       /* D1 */
+       str     d0, [x3]
        b 99f
-16:    str     q3, [x3, #48]           /* Q4 */
        nop
-17:    str     q2, [x3, #32]           /* Q3 */
+16:    bti     j                       /* Q4 */
+       str     q3, [x3, #48]
+       nop
        nop
-18:    stp     q0, q1, [x3]            /* Q2 */
+17:    bti     j                       /* Q3 */
+       str     q2, [x3, #32]
+       nop
+       nop
+18:    bti     j                       /* Q2 */
+       stp     q0, q1, [x3]
        b 99f
-19:    str     q0, [x3]                /* Q1 */
+       nop
+19:    bti     j                       /* Q1 */
+       str     q0, [x3]
        b 99f
-20:    uxtb    w0, w0                  /* UINT8 */
+       nop
+20:    bti     j                       /* UINT8 */
+       uxtb    w0, w0
        str     x0, [x3]
+       nop
 21:    b 99f                           /* reserved */
        nop
-22:    uxth    w0, w0                  /* UINT16 */
+       nop
+       nop
+22:    bti     j                       /* UINT16 */
+       uxth    w0, w0
        str     x0, [x3]
+       nop
 23:    b 99f                           /* reserved */
        nop
-24:    mov     w0, w0                  /* UINT32 */
+       nop
+       nop
+24:    bti     j                       /* UINT32 */
+       mov     w0, w0
        str     x0, [x3]
+       nop
 25:    b 99f                           /* reserved */
        nop
-26:    sxtb    x0, w0                  /* SINT8 */
+       nop
+       nop
+26:    bti     j                       /* SINT8 */
+       sxtb    x0, w0
        str     x0, [x3]
+       nop
 27:    b 99f                           /* reserved */
        nop
-28:    sxth    x0, w0                  /* SINT16 */
+       nop
+       nop
+28:    bti     j                       /* SINT16 */
+       sxth    x0, w0
        str     x0, [x3]
+       nop
 29:    b 99f                           /* reserved */
        nop
-30:    sxtw    x0, w0                  /* SINT32 */
+       nop
+       nop
+30:    bti     j                       /* SINT32 */
+       sxtw    x0, w0
        str     x0, [x3]
+       nop
 31:    b 99f                           /* reserved */
        nop
+       nop
+       nop
 
        /* Return now that result has been populated. */
 99:
@@ -252,6 +317,7 @@ CNAME(ffi_call_SYSV):
        .align 4
 CNAME(ffi_closure_SYSV_V):
        cfi_startproc
+       bti     c
        SIGN_LR
        stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
        cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
@@ -276,6 +342,7 @@ CNAME(ffi_closure_SYSV_V):
        .align  4
        cfi_startproc
 CNAME(ffi_closure_SYSV):
+       bti     c
        SIGN_LR
        stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
        cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
@@ -305,73 +372,135 @@ L(do_closure):
        /* Load the return value as directed.  */
        adr     x1, 0f
        and     w0, w0, #AARCH64_RET_MASK
-       add     x1, x1, x0, lsl #3
+       add     x1, x1, x0, lsl #4
        add     x3, sp, #16+CALL_CONTEXT_SIZE
        br      x1
 
-       /* Note that each table entry is 2 insns, and thus 8 bytes.  */
+       /* Note that each table entry is 4 insns, and thus 16 bytes.  */
        .align  4
-0:     b       99f                     /* VOID */
+0:     bti     j                       /* VOID */
+       b       99f
+       nop
        nop
-1:     ldr     x0, [x3]                /* INT64 */
+1:     bti     j                       /* INT64 */
+       ldr     x0, [x3]
        b       99f
-2:     ldp     x0, x1, [x3]            /* INT128 */
+       nop
+2:     bti     j                       /* INT128 */
+       ldp     x0, x1, [x3]
        b       99f
+       nop
 3:     brk     #1000                   /* UNUSED */
        nop
+       nop
+       nop
 4:     brk     #1000                   /* UNUSED */
        nop
+       nop
+       nop
 5:     brk     #1000                   /* UNUSED */
        nop
+       nop
+       nop
 6:     brk     #1000                   /* UNUSED */
        nop
+       nop
+       nop
 7:     brk     #1000                   /* UNUSED */
        nop
-8:     ldr     s3, [x3, #12]           /* S4 */
        nop
-9:     ldr     s2, [x3, #8]            /* S3 */
        nop
-10:    ldp     s0, s1, [x3]            /* S2 */
+8:     bti     j                       /* S4 */
+       ldr     s3, [x3, #12]
+       nop
+       nop
+9:     bti     j                       /* S3 */
+       ldr     s2, [x3, #8]
+       nop
+       nop
+10:    bti     j                       /* S2 */
+       ldp     s0, s1, [x3]
        b       99f
-11:    ldr     s0, [x3]                /* S1 */
+       nop
+11:    bti     j                       /* S1 */
+       ldr     s0, [x3]
        b       99f
-12:    ldr     d3, [x3, #24]           /* D4 */
        nop
-13:    ldr     d2, [x3, #16]           /* D3 */
+12:    bti     j                       /* D4 */
+       ldr     d3, [x3, #24]
+       nop
+       nop
+13:    bti     j                       /* D3 */
+       ldr     d2, [x3, #16]
        nop
-14:    ldp     d0, d1, [x3]            /* D2 */
+       nop
+14:    bti     j                       /* D2 */
+       ldp     d0, d1, [x3]
        b       99f
-15:    ldr     d0, [x3]                /* D1 */
+       nop
+15:    bti     j                       /* D1 */
+       ldr     d0, [x3]
        b       99f
-16:    ldr     q3, [x3, #48]           /* Q4 */
        nop
-17:    ldr     q2, [x3, #32]           /* Q3 */
+16:    bti     j                       /* Q4 */
+       ldr     q3, [x3, #48]
        nop
-18:    ldp     q0, q1, [x3]            /* Q2 */
+       nop
+17:    bti     j                       /* Q3 */
+       ldr     q2, [x3, #32]
+       nop
+       nop
+18:    bti     j                       /* Q2 */
+       ldp     q0, q1, [x3]
        b       99f
-19:    ldr     q0, [x3]                /* Q1 */
+       nop
+19:    bti     j                       /* Q1 */
+       ldr     q0, [x3]
        b       99f
-20:    ldrb    w0, [x3, #BE(7)]        /* UINT8 */
+       nop
+20:    bti     j                       /* UINT8 */
+       ldrb    w0, [x3, #BE(7)]
        b       99f
+       nop
 21:    brk     #1000                   /* reserved */
        nop
-22:    ldrh    w0, [x3, #BE(6)]        /* UINT16 */
+       nop
+       nop
+22:    bti     j                       /* UINT16 */
+       ldrh    w0, [x3, #BE(6)]
        b       99f
+       nop
 23:    brk     #1000                   /* reserved */
        nop
-24:    ldr     w0, [x3, #BE(4)]        /* UINT32 */
+       nop
+       nop
+24:    bti     j                       /* UINT32 */
+       ldr     w0, [x3, #BE(4)]
        b       99f
+       nop
 25:    brk     #1000                   /* reserved */
        nop
-26:    ldrsb   x0, [x3, #BE(7)]        /* SINT8 */
+       nop
+       nop
+26:    bti     j                       /* SINT8 */
+       ldrsb   x0, [x3, #BE(7)]
        b       99f
+       nop
 27:    brk     #1000                   /* reserved */
        nop
-28:    ldrsh   x0, [x3, #BE(6)]        /* SINT16 */
+       nop
+       nop
+28:    bti     j                       /* SINT16 */
+       ldrsh   x0, [x3, #BE(6)]
        b       99f
+       nop
 29:    brk     #1000                   /* reserved */
        nop
-30:    ldrsw   x0, [x3, #BE(4)]        /* SINT32 */
+       nop
+       nop
+30:    bti     j                       /* SINT32 */
+       ldrsw   x0, [x3, #BE(4)]
+       nop
        nop
 31:                                    /* reserved */
 99:    ldp     x29, x30, [sp], #ffi_closure_SYSV_FS
@@ -485,6 +614,7 @@ CNAME(ffi_closure_trampoline_table_page):
        .align 4
 CNAME(ffi_go_closure_SYSV_V):
        cfi_startproc
+       bti     c
        stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
        cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
        cfi_rel_offset (x29, 0)
@@ -508,6 +638,7 @@ CNAME(ffi_go_closure_SYSV_V):
        .align  4
        cfi_startproc
 CNAME(ffi_go_closure_SYSV):
+       bti     c
        stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
        cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
        cfi_rel_offset (x29, 0)