From: Ganesh Gopalasubramanian Date: Tue, 26 Feb 2019 16:55:10 +0000 (+0000) Subject: [X86] AMD znver2 enablement X-Git-Tag: llvmorg-10-init~11187 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e172d7008d0c12eaa9ee2aee16ee4e13c176553c;p=platform%2Fupstream%2Fllvm.git [X86] AMD znver2 enablement This patch enables the following 1) AMD family 17h "znver2" tune flag (-march, -mcpu). 2) ISAs that are enabled for "znver2" architecture. 3) For the time being, it uses the znver1 scheduler model. 4) Tests are updated. 5) Scheduler descriptions are yet to be put in place. Reviewers: craig.topper Differential Revision: https://reviews.llvm.org/D58343 llvm-svn: 354897 --- diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index fb82228..2b5936b 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -98,6 +98,7 @@ X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-a X86_CPU_SUBTYPE_COMPAT("cannonlake", INTEL_COREI7_CANNONLAKE, "cannonlake") X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client") X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server") +X86_CPU_SUBTYPE_COMPAT("znver2", AMDFAM17H_ZNVER2, "znver2") // Entries below this are not in libgcc/compiler-rt. X86_CPU_SUBTYPE ("core2", INTEL_CORE2_65) X86_CPU_SUBTYPE ("penryn", INTEL_CORE2_45) diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 35bc972..52e7080 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -916,7 +916,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, break; // "btver2" case 23: *Type = X86::AMDFAM17H; - *Subtype = X86::AMDFAM17H_ZNVER1; + if (Model >= 0x30 && Model <= 0x3f) { + *Subtype = X86::AMDFAM17H_ZNVER2; + break; // "znver2"; 30h-3fh: Zen2 + } + if (Model <= 0x0f) { + *Subtype = X86::AMDFAM17H_ZNVER1; + break; // "znver1"; 00h-0Fh: Zen1 + } break; default: break; // "generic" diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 502278b..0947900 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1143,8 +1143,8 @@ def : Proc<"bdver4", [ FeatureMacroFusion ]>; -// Znver1 -def: ProcessorModel<"znver1", Znver1Model, [ +// AMD Zen Processors common ISAs +def ZNFeatures : ProcessorFeatures<[], [ FeatureADX, FeatureAES, FeatureAVX2, @@ -1183,6 +1183,19 @@ def: ProcessorModel<"znver1", Znver1Model, [ FeatureXSAVEOPT, FeatureXSAVES]>; +class Znver1Proc : ProcModel; +def : Znver1Proc<"znver1">; + +class Znver2Proc : ProcModel; +def : Znver2Proc<"znver2">; + def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; diff --git a/llvm/test/CodeGen/X86/cpus-amd.ll b/llvm/test/CodeGen/X86/cpus-amd.ll index 5e90048..c5716d6 100644 --- a/llvm/test/CodeGen/X86/cpus-amd.ll +++ b/llvm/test/CodeGen/X86/cpus-amd.ll @@ -26,6 +26,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty define void @foo() { ret void diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll index 90abd2d..8d43a1b 100644 --- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -5,6 +5,8 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s ; Test one 32-bit input, output is 32-bit, no transformations expected. define i32 @test_zext_cmp0(i32 %a) { diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll index 54c248f..f2c7c2f 100644 --- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll +++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll @@ -47,6 +47,7 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=FAST ; Other chips with slow unaligned memory accesses diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll index 10cb184..513e777 100644 --- a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll +++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -13,8 +13,9 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s -; Verify that for the X86_64 processors that are known to have poor latency +; Verify that for the X86_64 processors that are known to have poor latency ; double precision shift instructions we do not generate 'shld' or 'shrd' ; instructions. @@ -25,7 +26,7 @@ define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone { entry: -; CHECK-NOT: shld +; CHECK-NOT: shld %sh_prom = zext i32 %c to i64 %shl = shl i64 %a, %sh_prom %sub = sub nsw i32 64, %c