From d93c0be8a0f11f207e8232770c5f0590406efad1 Mon Sep 17 00:00:00 2001 From: Tobias Heider Date: Thu, 23 Nov 2023 15:15:29 +0100 Subject: [PATCH] Add bti intructions to aarch64 assembly to work with strict (#808) BTI enforcement on OpenBSD. --- src/aarch64/ffi.c | 38 +++++--- src/aarch64/sysv.S | 223 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 202 insertions(+), 59 deletions(-) diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c index 6544ac0..67b18fb 100644 --- a/src/aarch64/ffi.c +++ b/src/aarch64/ffi.c @@ -390,47 +390,59 @@ extend_hfa_type (void *dest, void *src, int h) "adr %0, 0f\n" " add %0, %0, %1\n" " br %0\n" -"0: ldp s16, s17, [%3]\n" /* S4 */ +"0: bti j\n" /* S4 */ +" ldp s16, s17, [%3]\n" " ldp s18, s19, [%3, #8]\n" " b 4f\n" -" ldp s16, s17, [%3]\n" /* S3 */ +" bti j\n" /* S3 */ +" ldp s16, s17, [%3]\n" " ldr s18, [%3, #8]\n" " b 3f\n" -" ldp s16, s17, [%3]\n" /* S2 */ +" bti j\n" /* S2 */ +" ldp s16, s17, [%3]\n" " b 2f\n" " nop\n" -" ldr s16, [%3]\n" /* S1 */ +" bti j\n" /* S1 */ +" ldr s16, [%3]\n" " b 1f\n" " nop\n" -" ldp d16, d17, [%3]\n" /* D4 */ +" bti j\n" /* D4 */ +" ldp d16, d17, [%3]\n" " ldp d18, d19, [%3, #16]\n" " b 4f\n" -" ldp d16, d17, [%3]\n" /* D3 */ +" bti j\n" /* D3 */ +" ldp d16, d17, [%3]\n" " ldr d18, [%3, #16]\n" " b 3f\n" -" ldp d16, d17, [%3]\n" /* D2 */ +" bti j\n" /* D2 */ +" ldp d16, d17, [%3]\n" " b 2f\n" " nop\n" -" ldr d16, [%3]\n" /* D1 */ +" bti j\n" /* D1 */ +" ldr d16, [%3]\n" " b 1f\n" " nop\n" -" ldp q16, q17, [%3]\n" /* Q4 */ +" bti j\n" /* Q4 */ +" ldp q16, q17, [%3]\n" " ldp q18, q19, [%3, #32]\n" " b 4f\n" -" ldp q16, q17, [%3]\n" /* Q3 */ +" bti j\n" /* Q3 */ +" ldp q16, q17, [%3]\n" " ldr q18, [%3, #32]\n" " b 3f\n" -" ldp q16, q17, [%3]\n" /* Q2 */ +" bti j\n" /* Q2 */ +" ldp q16, q17, [%3]\n" " b 2f\n" " nop\n" -" ldr q16, [%3]\n" /* Q1 */ +" bti j\n" /* Q1 */ +" ldr q16, [%3]\n" " b 1f\n" "4: str q19, [%2, #48]\n" "3: str q18, [%2, #32]\n" "2: str q17, [%2, #16]\n" "1: str q16, [%2]" : "=&r"(x0) - : "r"(f * 12), "r"(dest), "r"(src) + : "r"(f * 16), "r"(dest), "r"(src) : "memory", "v16", "v17", "v18", "v19"); } #endif diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S index 286f1f1..9f4188e 100644 --- a/src/aarch64/sysv.S +++ b/src/aarch64/sysv.S @@ -84,6 +84,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ cfi_startproc CNAME(ffi_call_SYSV): + bti c /* Sign the lr with x1 since that is where it will be stored */ SIGN_LR_WITH_REG(x1) @@ -144,78 +145,142 @@ CNAME(ffi_call_SYSV): /* Save the return value as directed. */ adr x5, 0f and w4, w4, #AARCH64_RET_MASK - add x5, x5, x4, lsl #3 + add x5, x5, x4, lsl #4 br x5 - /* Note that each table entry is 2 insns, and thus 8 bytes. + /* Note that each table entry is 4 insns, and thus 16 bytes. For integer data, note that we're storing into ffi_arg and therefore we want to extend to 64 bits; these types have two consecutive entries allocated for them. */ .align 4 -0: b 99f /* VOID */ +0: bti j /* VOID */ + b 99f + nop nop -1: str x0, [x3] /* INT64 */ +1: bti j /* INT64 */ + str x0, [x3] b 99f -2: stp x0, x1, [x3] /* INT128 */ + nop +2: bti j /* INT128 */ + stp x0, x1, [x3] b 99f + nop 3: brk #1000 /* UNUSED */ b 99f + nop + nop 4: brk #1000 /* UNUSED */ b 99f + nop + nop 5: brk #1000 /* UNUSED */ b 99f + nop + nop 6: brk #1000 /* UNUSED */ b 99f + nop + nop 7: brk #1000 /* UNUSED */ b 99f -8: st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */ + nop + nop +8: bti j /* S4 */ + st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] b 99f -9: st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */ + nop +9: bti j /* S3 */ + st3 { v0.s, v1.s, v2.s }[0], [x3] b 99f -10: stp s0, s1, [x3] /* S2 */ + nop +10: bti j /* S2 */ + stp s0, s1, [x3] b 99f -11: str s0, [x3] /* S1 */ + nop +11: bti j + str s0, [x3] /* S1 */ b 99f -12: st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */ + nop +12: bti j /* D4 */ + st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] b 99f -13: st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */ + nop +13: bti j /* D3 */ + st3 { v0.d, v1.d, v2.d }[0], [x3] b 99f -14: stp d0, d1, [x3] /* D2 */ + nop +14: bti j /* D2 */ + stp d0, d1, [x3] b 99f -15: str d0, [x3] /* D1 */ + nop +15: bti j /* D1 */ + str d0, [x3] b 99f -16: str q3, [x3, #48] /* Q4 */ nop -17: str q2, [x3, #32] /* Q3 */ +16: bti j /* Q4 */ + str q3, [x3, #48] + nop nop -18: stp q0, q1, [x3] /* Q2 */ +17: bti j /* Q3 */ + str q2, [x3, #32] + nop + nop +18: bti j /* Q2 */ + stp q0, q1, [x3] b 99f -19: str q0, [x3] /* Q1 */ + nop +19: bti j /* Q1 */ + str q0, [x3] b 99f -20: uxtb w0, w0 /* UINT8 */ + nop +20: bti j /* UINT8 */ + uxtb w0, w0 str x0, [x3] + nop 21: b 99f /* reserved */ nop -22: uxth w0, w0 /* UINT16 */ + nop + nop +22: bti j /* UINT16 */ + uxth w0, w0 str x0, [x3] + nop 23: b 99f /* reserved */ nop -24: mov w0, w0 /* UINT32 */ + nop + nop +24: bti j /* UINT32 */ + mov w0, w0 str x0, [x3] + nop 25: b 99f /* reserved */ nop -26: sxtb x0, w0 /* SINT8 */ + nop + nop +26: bti j /* SINT8 */ + sxtb x0, w0 str x0, [x3] + nop 27: b 99f /* reserved */ nop -28: sxth x0, w0 /* SINT16 */ + nop + nop +28: bti j /* SINT16 */ + sxth x0, w0 str x0, [x3] + nop 29: b 99f /* reserved */ nop -30: sxtw x0, w0 /* SINT32 */ + nop + nop +30: bti j /* SINT32 */ + sxtw x0, w0 str x0, [x3] + nop 31: b 99f /* reserved */ nop + nop + nop /* Return now that result has been populated. */ 99: @@ -252,6 +317,7 @@ CNAME(ffi_call_SYSV): .align 4 CNAME(ffi_closure_SYSV_V): cfi_startproc + bti c SIGN_LR stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) @@ -276,6 +342,7 @@ CNAME(ffi_closure_SYSV_V): .align 4 cfi_startproc CNAME(ffi_closure_SYSV): + bti c SIGN_LR stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) @@ -305,73 +372,135 @@ L(do_closure): /* Load the return value as directed. */ adr x1, 0f and w0, w0, #AARCH64_RET_MASK - add x1, x1, x0, lsl #3 + add x1, x1, x0, lsl #4 add x3, sp, #16+CALL_CONTEXT_SIZE br x1 - /* Note that each table entry is 2 insns, and thus 8 bytes. */ + /* Note that each table entry is 4 insns, and thus 16 bytes. */ .align 4 -0: b 99f /* VOID */ +0: bti j /* VOID */ + b 99f + nop nop -1: ldr x0, [x3] /* INT64 */ +1: bti j /* INT64 */ + ldr x0, [x3] b 99f -2: ldp x0, x1, [x3] /* INT128 */ + nop +2: bti j /* INT128 */ + ldp x0, x1, [x3] b 99f + nop 3: brk #1000 /* UNUSED */ nop + nop + nop 4: brk #1000 /* UNUSED */ nop + nop + nop 5: brk #1000 /* UNUSED */ nop + nop + nop 6: brk #1000 /* UNUSED */ nop + nop + nop 7: brk #1000 /* UNUSED */ nop -8: ldr s3, [x3, #12] /* S4 */ nop -9: ldr s2, [x3, #8] /* S3 */ nop -10: ldp s0, s1, [x3] /* S2 */ +8: bti j /* S4 */ + ldr s3, [x3, #12] + nop + nop +9: bti j /* S3 */ + ldr s2, [x3, #8] + nop + nop +10: bti j /* S2 */ + ldp s0, s1, [x3] b 99f -11: ldr s0, [x3] /* S1 */ + nop +11: bti j /* S1 */ + ldr s0, [x3] b 99f -12: ldr d3, [x3, #24] /* D4 */ nop -13: ldr d2, [x3, #16] /* D3 */ +12: bti j /* D4 */ + ldr d3, [x3, #24] + nop + nop +13: bti j /* D3 */ + ldr d2, [x3, #16] nop -14: ldp d0, d1, [x3] /* D2 */ + nop +14: bti j /* D2 */ + ldp d0, d1, [x3] b 99f -15: ldr d0, [x3] /* D1 */ + nop +15: bti j /* D1 */ + ldr d0, [x3] b 99f -16: ldr q3, [x3, #48] /* Q4 */ nop -17: ldr q2, [x3, #32] /* Q3 */ +16: bti j /* Q4 */ + ldr q3, [x3, #48] nop -18: ldp q0, q1, [x3] /* Q2 */ + nop +17: bti j /* Q3 */ + ldr q2, [x3, #32] + nop + nop +18: bti j /* Q2 */ + ldp q0, q1, [x3] b 99f -19: ldr q0, [x3] /* Q1 */ + nop +19: bti j /* Q1 */ + ldr q0, [x3] b 99f -20: ldrb w0, [x3, #BE(7)] /* UINT8 */ + nop +20: bti j /* UINT8 */ + ldrb w0, [x3, #BE(7)] b 99f + nop 21: brk #1000 /* reserved */ nop -22: ldrh w0, [x3, #BE(6)] /* UINT16 */ + nop + nop +22: bti j /* UINT16 */ + ldrh w0, [x3, #BE(6)] b 99f + nop 23: brk #1000 /* reserved */ nop -24: ldr w0, [x3, #BE(4)] /* UINT32 */ + nop + nop +24: bti j /* UINT32 */ + ldr w0, [x3, #BE(4)] b 99f + nop 25: brk #1000 /* reserved */ nop -26: ldrsb x0, [x3, #BE(7)] /* SINT8 */ + nop + nop +26: bti j /* SINT8 */ + ldrsb x0, [x3, #BE(7)] b 99f + nop 27: brk #1000 /* reserved */ nop -28: ldrsh x0, [x3, #BE(6)] /* SINT16 */ + nop + nop +28: bti j /* SINT16 */ + ldrsh x0, [x3, #BE(6)] b 99f + nop 29: brk #1000 /* reserved */ nop -30: ldrsw x0, [x3, #BE(4)] /* SINT32 */ + nop + nop +30: bti j /* SINT32 */ + ldrsw x0, [x3, #BE(4)] + nop nop 31: /* reserved */ 99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS @@ -485,6 +614,7 @@ CNAME(ffi_closure_trampoline_table_page): .align 4 CNAME(ffi_go_closure_SYSV_V): cfi_startproc + bti c stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) cfi_rel_offset (x29, 0) @@ -508,6 +638,7 @@ CNAME(ffi_go_closure_SYSV_V): .align 4 cfi_startproc CNAME(ffi_go_closure_SYSV): + bti c stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) cfi_rel_offset (x29, 0) -- 2.34.1