From 997070498b0d713ecfb384dc12d1e68ebdbee5bd Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 14 Jan 2022 11:38:33 +0000 Subject: [PATCH] arm: elide some cases where the AES erratum workaround is not required. Some common cases where the AES erratum workaround are not required are when there are 64- or 128-bit loads from memory, moving a 128-bit value from core registers, and where a 128-bit constant is being loaded from a literal pool. The loads may also be misaligned or generated via a neon intrinsic function. gcc/ChangeLog: * config/arm/crypto.md (aes_op_protect): Allow moves from core registers and from memory. (aes_op_protect_misalign_load): New pattern. (aes_op_protect_neon_vld1v16qi): New pattern. --- gcc/config/arm/crypto.md | 55 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/gcc/config/arm/crypto.md b/gcc/config/arm/crypto.md index df85735..4c78507 100644 --- a/gcc/config/arm/crypto.md +++ b/gcc/config/arm/crypto.md @@ -62,17 +62,56 @@ [(set_attr "type" "")] ) -; Mitigate against AES erratum on Cortex-A57 and Cortex-A72 by performing -; a 128-bit operation on an operand producer. This can be eliminated only -; if we know that the operand was produced by a full-width operation. -; V16QImode matches for the AES instructions. +;; Mitigate against AES erratum on Cortex-A57 and Cortex-A72 by +;; performing a 128-bit operation on an operand producer. This can be +;; eliminated only if we know that the operand was produced by a +;; full-width operation. V16QImode matches for the AES +;; instructions. Handle some very common cases where the source is +;; known to be safe (transfers from core registers and memory). (define_insn "aes_op_protect" - [(set (match_operand:V16QI 0 "register_operand" "=w") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")] + [(set (match_operand:V16QI 0 "register_operand" "=w,w,w") + (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "w,r,Uni")] + UNSPEC_AES_PROTECT))] + "TARGET_CRYPTO && fix_aes_erratum_1742098" + { + switch (which_alternative) + { + case 0: return "vmov\t%q0, %q1"; + case 1: return "vmov\t%e0, %Q1, %R1 @ V16QI\;vmov\t%f0, %J1, %K1"; + case 2: return output_move_neon (operands); + default: gcc_unreachable (); + } + } + [(set_attr "type" "neon_move_q,neon_from_gp_q,neon_load1_4reg") + (set_attr "length" "4,8,8") + (set_attr "arm_pool_range" "*,*,1020") + (set_attr "thumb2_pool_range" "*,*,1018") + (set_attr "neg_pool_range" "*,*,996")] +) + +;; Another safe case is when a movmisalign load is used as the source. +(define_insn "*aes_op_protect_misalign_load" + [(set (match_operand:V16QI 0 "s_register_operand" "=w") + (unspec:V16QI + [(unspec:V16QI + [(match_operand:V16QI 1 "neon_permissive_struct_operand" "Um")] + UNSPEC_MISALIGNED_ACCESS)] UNSPEC_AES_PROTECT))] "TARGET_CRYPTO && fix_aes_erratum_1742098" - "vmov\\t%q0, %q1" - [(set_attr "type" "neon_move_q")] + "vld1.8\t%{q0}, %A1" + [(set_attr "type" "neon_load1_1reg_q")] +) + +;; Similarly for the vld1 intrinsic +(define_insn "aes_op_protect_neon_vld1v16qi" + [(set (match_operand:V16QI 0 "s_register_operand" "=w") + (unspec:V16QI + [(unspec:V16QI [(match_operand:V16QI 1 "neon_struct_operand" "Um")] + UNSPEC_VLD1)] + UNSPEC_AES_PROTECT))] + "TARGET_NEON" + "vld1.8\t%h0, %A1" + [(set_attr "type" "neon_load1_1reg_q")] ) ;; An AESMC operation can feed directly into a subsequent AES -- 2.7.4