From: Andrew Stubbs Date: Thu, 8 Jul 2021 14:47:53 +0000 (+0100) Subject: amdgcn: Add -mxnack and -msram-ecc [PR 100208] X-Git-Tag: upstream/12.2.0~6301 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=aad32a00b7d2b64ae158b2b167768a9ae3e20f6e;p=platform%2Fupstream%2Fgcc.git amdgcn: Add -mxnack and -msram-ecc [PR 100208] gcc/ChangeLog: PR target/100208 * config/gcn/gcn-hsa.h (DRIVER_SELF_SPECS): New. (ASM_SPEC): Set -mattr for xnack and sram-ecc. * config/gcn/gcn-opts.h (enum sram_ecc_type): New. * config/gcn/gcn-valu.md: Add a warning comment. * config/gcn/gcn.c (gcn_option_override): Add "sorry" for -mxnack. (output_file_start): Add xnack and sram-ecc state to ".amdgcn_target". * config/gcn/gcn.md: Add a warning comment. * config/gcn/gcn.opt: Add -mxnack and -msram-ecc. * config/gcn/mkoffload.c (EF_AMDGPU_MACH_AMDGCN_GFX908): Remove SRAM-ECC flag. (EF_AMDGPU_XNACK): New. (EF_AMDGPU_SRAM_ECC): New. (elf_flags): New. (copy_early_debug_info): Use elf_flags. (main): Handle -mxnack and -msram-ecc options. * doc/invoke.texi: Document -mxnack and -msram-ecc. gcc/testsuite/ChangeLog: PR target/100208 * gcc.target/gcn/sram-ecc-1.c: New test. * gcc.target/gcn/sram-ecc-2.c: New test. * gcc.target/gcn/sram-ecc-3.c: New test. * gcc.target/gcn/sram-ecc-4.c: New test. * gcc.target/gcn/sram-ecc-5.c: New test. * gcc.target/gcn/sram-ecc-6.c: New test. * gcc.target/gcn/sram-ecc-7.c: New test. * gcc.target/gcn/sram-ecc-8.c: New test. --- diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index 61cdb31..724e9a3 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -75,9 +75,15 @@ extern unsigned int gcn_local_sym_hash (const char *name); supported for gcn. */ #define GOMP_SELF_SPECS "" +#define DRIVER_SELF_SPECS \ + "%{march=fiji|march=gfx900|march=gfx906:%{!msram-ecc=*:-msram-ecc=off}}" + /* Use LLVM assembler and linker options. */ #define ASM_SPEC "-triple=amdgcn--amdhsa " \ "%:last_arg(%{march=*:-mcpu=%*}) " \ + "-mattr=%{mxnack:+xnack;:-xnack} " \ + /* FIXME: support "any" when we move to HSACOv4. */ \ + "-mattr=%{!msram-ecc=off:+sram-ecc;:-sram-ecc} " \ "-filetype=obj" #define LINK_SPEC "--pie --export-dynamic" #define LIB_SPEC "-lc" diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index ed67d01..b255160 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -34,4 +34,11 @@ extern int gcn_isa; #define TARGET_GCN5 (gcn_isa == 5) #define TARGET_GCN5_PLUS (gcn_isa >= 5) +enum sram_ecc_type +{ + SRAM_ECC_OFF, + SRAM_ECC_ON, + SRAM_ECC_ANY +}; + #endif diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index beefcf7..84ff675 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -703,6 +703,8 @@ ;; - The address space and glc (volatile) fields are there to replace the ;; fields normally found in a MEM. ;; - Multiple forms of address expression are supported, below. +;; +;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on (define_expand "gather_load" [(match_operand:V_ALL 0 "register_operand") diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c index 6d02a4a..385b90c 100644 --- a/gcc/config/gcn/gcn.c +++ b/gcc/config/gcn/gcn.c @@ -144,6 +144,10 @@ gcn_option_override (void) /* 1MB total. */ stack_size_opt = 1048576; } + + /* The xnack option is a placeholder, for now. */ + if (flag_xnack) + sorry ("XNACK support"); } /* }}} */ @@ -5182,11 +5186,16 @@ output_file_start (void) case PROCESSOR_FIJI: cpu = "gfx803"; break; case PROCESSOR_VEGA10: cpu = "gfx900"; break; case PROCESSOR_VEGA20: cpu = "gfx906"; break; - case PROCESSOR_GFX908: cpu = "gfx908+sram-ecc"; break; + case PROCESSOR_GFX908: cpu = "gfx908"; break; default: gcc_unreachable (); } - fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s\"\n", cpu); + const char *xnack = (flag_xnack ? "+xnack" : ""); + /* FIXME: support "any" when we move to HSACOv4. */ + const char *sram_ecc = (flag_sram_ecc ? "+sram-ecc" : ""); + + fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n", + cpu, xnack, sram_ecc); } /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h. diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index ae7249a..8ffa43c 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -569,6 +569,7 @@ (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")]) ; 8/16bit move pattern +; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on (define_insn "*mov_insn" [(set (match_operand:QIHI 0 "nonimmediate_operand" diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index 767d458..b2b10b0 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -76,3 +76,24 @@ Target RejectNegative Joined UInteger Var(stack_size_opt) Init(-1) Wopenacc-dims Target Var(warn_openacc_dims) Warning Warn about invalid OpenACC dimensions. + +mxnack +Target Var(flag_xnack) Init(0) +Compile for devices requiring XNACK enabled. Default off. + +Enum +Name(sram_ecc_type) Type(enum sram_ecc_type) +SRAM-ECC modes: + +EnumValue +Enum(sram_ecc_type) String(off) Value(SRAM_ECC_OFF) + +EnumValue +Enum(sram_ecc_type) String(on) Value(SRAM_ECC_ON) + +EnumValue +Enum(sram_ecc_type) String(any) Value(SRAM_ECC_ANY) + +msram-ecc= +Target RejectNegative Joined ToLower Enum(sram_ecc_type) Var(flag_sram_ecc) Init(SRAM_ECC_ANY) +Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\". diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c index 1469a68..804cc26 100644 --- a/gcc/config/gcn/mkoffload.c +++ b/gcc/config/gcn/mkoffload.c @@ -52,7 +52,10 @@ #undef EF_AMDGPU_MACH_AMDGCN_GFX906 #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f #undef EF_AMDGPU_MACH_AMDGCN_GFX908 -#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x230 // Assume SRAM-ECC enabled. +#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30 + +#define EF_AMDGPU_XNACK 0x100 +#define EF_AMDGPU_SRAM_ECC 0x200 #ifndef R_AMDGPU_NONE #define R_AMDGPU_NONE 0 @@ -77,6 +80,7 @@ static struct obstack files_to_cleanup; enum offload_abi offload_abi = OFFLOAD_ABI_UNSET; uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture. +uint32_t elf_flags = 0; /* Delete tempfiles. */ @@ -298,7 +302,7 @@ copy_early_debug_info (const char *infile, const char *outfile) ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA; ehdr.e_type = ET_REL; ehdr.e_machine = EM_AMDGPU; - ehdr.e_flags = elf_arch; + ehdr.e_flags = elf_arch | elf_flags; /* Load the section headers so we can walk them later. */ Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr) @@ -823,6 +827,7 @@ main (int argc, char **argv) bool fopenacc = false; bool fPIC = false; bool fpic = false; + bool sram_seen = false; for (int i = 1; i < argc; i++) { #define STR "-foffload-abi=" @@ -845,6 +850,26 @@ main (int argc, char **argv) fPIC = true; else if (strcmp (argv[i], "-fpic") == 0) fpic = true; + else if (strcmp (argv[i], "-mxnack") == 0) + elf_flags |= EF_AMDGPU_XNACK; + else if (strcmp (argv[i], "-mno-xnack") == 0) + elf_flags &= ~EF_AMDGPU_XNACK; + else if (strcmp (argv[i], "-msram-ecc=on") == 0) + { + elf_flags |= EF_AMDGPU_SRAM_ECC; + sram_seen = true; + } + else if (strcmp (argv[i], "-msram-ecc=any") == 0) + { + /* FIXME: change this when we move to HSACOv4. */ + elf_flags |= EF_AMDGPU_SRAM_ECC; + sram_seen = true; + } + else if (strcmp (argv[i], "-msram-ecc=off") == 0) + { + elf_flags &= ~EF_AMDGPU_SRAM_ECC; + sram_seen = true; + } else if (strcmp (argv[i], "-save-temps") == 0) save_temps = true; else if (strcmp (argv[i], "-v") == 0) @@ -865,6 +890,21 @@ main (int argc, char **argv) if (!(fopenacc ^ fopenmp)) fatal_error (input_location, "either -fopenacc or -fopenmp must be set"); + /* The SRAM-ECC feature defaults to "any" on GPUs where the feature is + available. */ + if (!sram_seen) + switch (elf_arch) + { + case EF_AMDGPU_MACH_AMDGCN_GFX803: + case EF_AMDGPU_MACH_AMDGCN_GFX900: + case EF_AMDGPU_MACH_AMDGCN_GFX906: + break; + default: + /* FIXME: change this when we move to HSACOv4. */ + elf_flags |= EF_AMDGPU_SRAM_ECC; + break; + } + const char *abi; switch (offload_abi) { @@ -892,6 +932,12 @@ main (int argc, char **argv) obstack_ptr_grow (&cc_argv_obstack, "-xlto"); if (fopenmp) obstack_ptr_grow (&cc_argv_obstack, "-mgomp"); + obstack_ptr_grow (&cc_argv_obstack, + (elf_flags & EF_AMDGPU_XNACK + ? "-mxnack" : "-mno-xnack")); + obstack_ptr_grow (&cc_argv_obstack, + (elf_flags & EF_AMDGPU_SRAM_ECC + ? "-msram-ecc=on" : "-msram-ecc=off")); for (int ix = 1; ix != argc; ix++) { @@ -993,6 +1039,14 @@ main (int argc, char **argv) } obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name); obstack_ptr_grow (&ld_argv_obstack, "-lgomp"); + obstack_ptr_grow (&ld_argv_obstack, + (elf_flags & EF_AMDGPU_XNACK + ? "-mxnack" : "-mno-xnack")); + obstack_ptr_grow (&ld_argv_obstack, + (elf_flags & EF_AMDGPU_SRAM_ECC + ? "-msram-ecc=on" : "-msram-ecc=off")); + if (verbose) + obstack_ptr_grow (&ld_argv_obstack, "-v"); for (int i = 1; i < argc; i++) if (startswith (argv[i], "-l") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b16176e..32697e6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -18847,6 +18847,15 @@ Compile for GCN5 Vega 20 devices (gfx906). @end table +@item -msram-ecc=on +@itemx -msram-ecc=off +@itemx -msram-ecc=any +@opindex msram-ecc +Compile binaries suitable for devices with the SRAM-ECC feature enabled, +disabled, or either mode. This feature can be enabled per-process on some +devices. The compiled code must match the device mode. The default is +@samp{any}, for devices that support it. + @item -mstack-size=@var{bytes} @opindex mstack-size Specify how many @var{bytes} of stack space will be requested for each GPU @@ -18855,6 +18864,14 @@ available. The size of the stack allocation may also have an impact on run-time performance. The default is 32KB when using OpenACC or OpenMP, and 1MB otherwise. +@item -mxnack +@opindex mxnack +Compile binaries suitable for devices with the XNACK feature enabled. Some +devices always require XNACK and some allow the user to configure XNACK. The +compiled code must match the device mode. The default is @samp{-mno-xnack}. +At present this option is a placeholder for support that is not yet +implemented. + @end table @node ARC Options diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-1.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-1.c new file mode 100644 index 0000000..d46c302 --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-1.c @@ -0,0 +1,17 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets without sram-ecc enabled (in which sub-dword loads do not + zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -msram-ecc=off" } */ + +extern unsigned char c; + +unsigned int +f () +{ + return c; +} + +/* { dg-final { scan-assembler "lshl.* 24" } } */ +/* { dg-final { scan-assembler "lshr.* 24" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-2.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-2.c new file mode 100644 index 0000000..351d43c --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-2.c @@ -0,0 +1,17 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets without sram-ecc enabled (in which sub-dword loads do not + zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -msram-ecc=off" } */ + +extern unsigned short s; + +unsigned short +f () +{ + return s; +} + +/* { dg-final { scan-assembler "lshl.* 16" } } */ +/* { dg-final { scan-assembler "lshr.* 16" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c new file mode 100644 index 0000000..692d457 --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c @@ -0,0 +1,21 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets without sram-ecc enabled (in which sub-dword loads do not + zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */ + +typedef unsigned int v64si __attribute__ ((vector_size (64*4))); +typedef unsigned char v64qi __attribute__ ((vector_size (64*1))); + +extern v64si a; +extern v64qi b; + +void +f () +{ + for (int n = 0; n < 64; n++) + a[n] = b[n]; +} + +/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c new file mode 100644 index 0000000..61b8d55 --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c @@ -0,0 +1,21 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets without sram-ecc enabled (in which sub-dword loads do not + zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */ + +typedef unsigned int v64si __attribute__ ((vector_size (64*4))); +typedef unsigned short v64hi __attribute__ ((vector_size (64*2))); + +extern v64si a; +extern v64hi b; + +void +f () +{ + for (int n = 0; n < 64; n++) + a[n] = b[n]; +} + +/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-5.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-5.c new file mode 100644 index 0000000..4f0543b --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-5.c @@ -0,0 +1,17 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets that may not have sram-ecc enabled (in which sub-dword loads do + not zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -msram-ecc=any" } */ + +extern unsigned char c; + +unsigned int +f () +{ + return c; +} + +/* { dg-final { scan-assembler "lshl.* 24" } } */ +/* { dg-final { scan-assembler "lshr.* 24" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-6.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-6.c new file mode 100644 index 0000000..9dfceaf --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-6.c @@ -0,0 +1,17 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets that may not have sram-ecc enabled (in which sub-dword loads do + not zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -msram-ecc=any" } */ + +extern unsigned short s; + +unsigned short +f () +{ + return s; +} + +/* { dg-final { scan-assembler "lshl.* 16" } } */ +/* { dg-final { scan-assembler "lshr.* 16" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c new file mode 100644 index 0000000..9d0ce6f --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c @@ -0,0 +1,21 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets that may not have sram-ecc enabled (in which sub-dword loads do + not zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */ + +typedef unsigned int v64si __attribute__ ((vector_size (64*4))); +typedef unsigned char v64qi __attribute__ ((vector_size (64*1))); + +extern v64si a; +extern v64qi b; + +void +f () +{ + for (int n = 0; n < 64; n++) + a[n] = b[n]; +} + +/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c new file mode 100644 index 0000000..76e0288 --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c @@ -0,0 +1,21 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets that may not have sram-ecc enabled (in which sub-dword loads do + not zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */ + +typedef unsigned int v64si __attribute__ ((vector_size (64*4))); +typedef unsigned short v64hi __attribute__ ((vector_size (64*2))); + +extern v64si a; +extern v64hi b; + +void +f () +{ + for (int n = 0; n < 64; n++) + a[n] = b[n]; +} + +/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */