From ce548aa236962f95ccaf59f8692ed0861f3769dd Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 31 Mar 2021 14:14:13 +0300 Subject: [PATCH] [X86] AMD Zen 3 has macro fusion This is an improvement over Zen 2, where only branch fusion is supported, as per Agner, 21.4 Instruction fusion. AMD SOG 17h has no mention of fusion. AMD SOG 19h, 2.9.3 Branch Fusion The following flag writing instructions support branch fusion with their reg/reg, reg/imm and reg/mem forms * CMP * TEST * SUB * ADD * INC (no fusion with branches dependent on CF) * DEC (no fusion with branches dependent on CF) * OR * AND * XOR Agner, 22.4 Instruction fusion <...> This applies to CMP, TEST, ADD, SUB, AND, OR, XOR, INC, DEC and all conditional jumps, except if the arithmetic or logic instruction has a rip-relative address or both an address displacement and an immediate operand. --- llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 2 ++ llvm/lib/Target/X86/X86.td | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 4db1bfc..58e233d 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -115,6 +115,7 @@ namespace X86 { Cmp, // AND And, + // FIXME: Zen 3 support branch fusion for OR/XOR. // ADD, SUB AddSub, // INC, DEC @@ -183,6 +184,7 @@ namespace X86 { case X86::AND8rr: case X86::AND8rr_REV: return FirstMacroFusionInstKind::And; + // FIXME: Zen 3 support branch fusion for OR/XOR. // CMP case X86::CMP16i16: case X86::CMP16mr: diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 60c89b1..7acfc3c 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1090,7 +1090,9 @@ def ProcessorFeatures { FeaturePKU, FeatureVAES, FeatureVPCLMULQDQ]; - list ZN3Tuning = ZNTuning; + list ZN3AdditionalTuning = [FeatureMacroFusion]; + list ZN3Tuning = + !listconcat(ZNTuning, ZN3AdditionalTuning); list ZN3Features = !listconcat(ZN2Features, ZN3AdditionalFeatures); } -- 2.7.4