From b0a16e6d4c91120dd9a2900da0831e83e65f2046 Mon Sep 17 00:00:00 2001
From: Ghjuvan Lacambre <lacambre@adacore.com>
Date: Thu, 30 Jan 2020 11:47:00 +0100
Subject: [PATCH] [Ada] Implement initialization of CUDA runtime

gcc/ada/

	* debug.adb: Document -gnatd_c flag as being used for CUDA.
	* gnat_cuda.ads: New file.
	* gnat_cuda.adb: New file.
	* rtsfind.ads: Add Interfaces_C_Strings package and
	RE_Fatbin_Wrapper, RE_Register_Fat_Binary,
	RE_Register_Fat_Binary_End, RE_Register_Function, RE_Chars_Ptr,
	RE_New_Char_Array entities.
	* rtsfind.adb: Create new Interfaces_C_Descendant subtype,
	handle it.
	* sem_ch7.adb (Analyze_Package_Body_Helper): Call CUDA init
	procedure.
	* sem_prag.adb (Analyze_Pragma): Call Add_Cuda_Kernel procedure.
	* gcc-interface/Make-lang.in (GNAT_ADA_OBJS): Add gnat_cuda.o.
---
 gcc/ada/debug.adb                  |   2 +-
 gcc/ada/gcc-interface/Make-lang.in |   1 +
 gcc/ada/gnat_cuda.adb              | 586 +++++++++++++++++++++++++++++++++++++
 gcc/ada/gnat_cuda.ads              | 107 +++++++
 gcc/ada/rtsfind.adb                |   9 +-
 gcc/ada/rtsfind.ads                |  18 ++
 gcc/ada/sem_ch7.adb                |   8 +
 gcc/ada/sem_prag.adb               |   2 +
 8 files changed, 731 insertions(+), 2 deletions(-)
 create mode 100644 gcc/ada/gnat_cuda.adb
 create mode 100644 gcc/ada/gnat_cuda.ads

diff --git a/gcc/ada/debug.adb b/gcc/ada/debug.adb
index bbdaf3b..4eb3d5b 100644
--- a/gcc/ada/debug.adb
+++ b/gcc/ada/debug.adb
@@ -147,7 +147,7 @@ package body Debug is
 
    --  d_a  Stop elaboration checks on accept or select statement
    --  d_b
-   --  d_c
+   --  d_c  CUDA compilation : compile for the host
    --  d_d
    --  d_e  Ignore entry calls and requeue statements for elaboration
    --  d_f  Issue info messages related to GNATprove usage
diff --git a/gcc/ada/gcc-interface/Make-lang.in b/gcc/ada/gcc-interface/Make-lang.in
index d950217..78fe602 100644
--- a/gcc/ada/gcc-interface/Make-lang.in
+++ b/gcc/ada/gcc-interface/Make-lang.in
@@ -327,6 +327,7 @@ GNAT_ADA_OBJS =	\
  ada/libgnat/g-u3spch.o	\
  ada/get_targ.o	\
  ada/ghost.o	\
+ ada/gnat_cuda.o \
  ada/libgnat/gnat.o	\
  ada/gnatvsn.o	\
  ada/hostparm.o	\
diff --git a/gcc/ada/gnat_cuda.adb b/gcc/ada/gnat_cuda.adb
new file mode 100644
index 0000000..fef0d18
--- /dev/null
+++ b/gcc/ada/gnat_cuda.adb
@@ -0,0 +1,586 @@
+------------------------------------------------------------------------------
+--                                                                          --
+--                         GNAT COMPILER COMPONENTS                         --
+--                                                                          --
+--                                 C U D A                                  --
+--                                                                          --
+--                                 B o d y                                  --
+--                                                                          --
+--          Copyright (C) 2010-2020, Free Software Foundation, Inc.         --
+--                                                                          --
+-- GNAT is free software;  you can  redistribute it  and/or modify it under --
+-- terms of the  GNU General Public License as published  by the Free Soft- --
+-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
+-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
+-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
+-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
+-- for  more details.  You should have  received  a copy of the GNU General --
+-- Public License  distributed with GNAT; see file COPYING3.  If not, go to --
+-- http://www.gnu.org/licenses for a complete copy of the license.          --
+--                                                                          --
+-- GNAT was originally developed  by the GNAT team at  New York University. --
+-- Extensive contributions were provided by Ada Core Technologies Inc.      --
+--                                                                          --
+------------------------------------------------------------------------------
+
+--  This package defines CUDA-specific datastructures and functions.
+
+with Atree;    use Atree;
+with Debug;    use Debug;
+with Elists;   use Elists;
+with Namet;    use Namet;
+with Nlists;   use Nlists;
+with Nmake;    use Nmake;
+with Rtsfind;  use Rtsfind;
+with Sinfo;    use Sinfo;
+with Stringt;  use Stringt;
+with Tbuild;   use Tbuild;
+with Uintp;    use Uintp;
+with Sem;      use Sem;
+with Sem_Util; use Sem_Util;
+with Snames;   use Snames;
+
+with GNAT.HTable;
+
+package body GNAT_CUDA is
+
+   --------------------------------------
+   -- Hash Table for CUDA_Global nodes --
+   --------------------------------------
+
+   type Hash_Range is range 0 .. 510;
+   --  Size of hash table headers
+
+   function Hash (F : Entity_Id) return Hash_Range;
+   --  Hash function for hash table
+
+   package CUDA_Kernels_Table is new
+     GNAT.HTable.Simple_HTable
+       (Header_Num => Hash_Range,
+        Element    => Elist_Id,
+        No_Element => No_Elist,
+        Key        => Entity_Id,
+        Hash       => Hash,
+        Equal      => "=");
+   --  The keys of this table are package entities whose bodies contain at
+   --  least one procedure marked with aspect CUDA_Global. The values are
+   --  Elists of the marked procedures.
+
+   function Get_CUDA_Kernels (Pack_Id : Entity_Id) return Elist_Id;
+   --  Returns an Elist of all procedures marked with pragma CUDA_Global that
+   --  are declared within package body Pack_Body. Returns No_Elist if
+   --  Pack_Id does not contain such procedures.
+
+   procedure Set_CUDA_Kernels
+     (Pack_Id : Entity_Id;
+      Kernels : Elist_Id);
+   --  Stores Kernels as the list of kernels belonging to the package entity
+   --  Pack_Id. Pack_Id must not have a list of kernels.
+
+   ---------------------
+   -- Add_CUDA_Kernel --
+   ---------------------
+
+   procedure Add_CUDA_Kernel
+     (Pack_Id : Entity_Id;
+      Kernel  : Entity_Id)
+   is
+      Kernels : Elist_Id := Get_CUDA_Kernels (Pack_Id);
+   begin
+      if Kernels = No_Elist then
+         Kernels := New_Elmt_List;
+         Set_CUDA_Kernels (Pack_Id, Kernels);
+      end if;
+      Append_Elmt (Kernel, Kernels);
+   end Add_CUDA_Kernel;
+
+   ----------
+   -- Hash --
+   ----------
+
+   function Hash (F : Entity_Id) return Hash_Range is
+   begin
+      return Hash_Range (F mod 511);
+   end Hash;
+
+   ----------------------
+   -- Get_CUDA_Kernels --
+   ----------------------
+
+   function Get_CUDA_Kernels (Pack_Id : Entity_Id) return Elist_Id is
+   begin
+      return CUDA_Kernels_Table.Get (Pack_Id);
+   end Get_CUDA_Kernels;
+
+   ------------------------------------------
+   -- Build_And_Insert_CUDA_Initialization --
+   ------------------------------------------
+
+   procedure Build_And_Insert_CUDA_Initialization (N : Node_Id) is
+
+      --  For the following kernel declaration:
+      --
+      --  package body <Package_Name> is
+      --     procedure <Proc_Name> (X : Integer) with CUDA_Global;
+      --  end package;
+      --
+      --  Insert the following declarations:
+      --
+      --     Fat_Binary : System.Address;
+      --     pragma Import
+      --        (Convention    => C,
+      --         Entity        => Fat_Binary,
+      --         External_Name => "_binary_<Package_Name>_fatbin_start");
+      --
+      --     Wrapper : Fatbin_Wrapper :=
+      --       (16#466243b1#, 1, Fat_Binary'Address, System.Null_Address);
+      --
+      --     Proc_Symbol_Name : Interfaces.C.Strings.Chars_Ptr :=
+      --       Interfaces.C.Strings.New_Char_Array("<Proc_Name>");
+      --
+      --     Fat_Binary_Handle : System.Address :=
+      --       CUDA.Internal.Register_Fat_Binary (Wrapper'Address);
+      --
+      --     procedure Initialize_CUDA_Kernel is
+      --     begin
+      --        CUDA.Internal.Register_Function
+      --           (Fat_Binary_Handle,
+      --            <Proc_Name>'Address,
+      --            Proc_Symbol_Name,
+      --            Proc_Symbol_Name,
+      --            -1,
+      --            System.Null_Address,
+      --            System.Null_Address,
+      --            System.Null_Address,
+      --            System.Null_Address,
+      --            System.Null_Address);
+      --        CUDA.Internal.Register_Fat_Binary_End (Fat_Binary_Handle);
+      --     end Initialize_CUDA_Kernel;
+      --
+      --  Proc_Symbol_Name is the name of the procedure marked with
+      --  CUDA_Global. The CUDA runtime uses this in order to be able to find
+      --  kernels in the fat binary, so it has to match the name of the
+      --  procedure symbol compiled by GNAT_LLVM. When looking at the code
+      --  generated by NVCC, it seems that the CUDA runtime also needs the name
+      --  of the procedure symbol of the host. Fortuantely, the procedures are
+      --  named the same way whether they are compiled for the host or the
+      --  device, so we use Vector_Add_Name to specify the name of the symbol
+      --  for both the host and the device. The meaning of the rest of the
+      --  arguments is unknown.
+
+      function Build_CUDA_Init_Proc
+        (Init_Id      : Entity_Id;
+         CUDA_Kernels : Elist_Id;
+         Handle_Id    : Entity_Id;
+         Pack_Decls   : List_Id) return Node_Id;
+      --  Create the declaration of Init_Id, the function that binds each
+      --  kernel present in CUDA_Kernels with the fat binary Handle_Id and then
+      --  tells the CUDA runtime that no new function will be bound to the fat
+      --  binary.
+
+      function Build_Fat_Binary_Declaration
+        (Bin_Id : Entity_Id) return Node_Id;
+      --  Create a declaration for Bin_Id, the entity that represents the fat
+      --  binary, i.e.:
+      --
+      --    Bin_Id : System.Address;
+
+      function Build_Fat_Binary_Handle_Declaration
+        (Handle_Id  : Entity_Id;
+         Wrapper_Id : Entity_Id) return Node_Id;
+      --  Create the declaration of Handle_Id, a System.Address that will
+      --  receive the results of passing the address of Wrapper_Id to
+      --  CUDA.Register_Fat_Binary, i.e.:
+      --
+      --    Handle_Id : System.Address :=
+      --      CUDA.Register_Fat_Binary (Wrapper_Id'Address)
+
+      function Build_Fat_Binary_Wrapper_Declaration
+        (Wrapper_Id : Entity_Id;
+         Bin_Id     : Entity_Id) return Node_Id;
+      --  Create the declaration of the fat binary wrapper Wrapper_Id, which
+      --  holds magic numbers and Bin_Id'Address, i.e.:
+      --
+      --     Wrapper_Id : System.Address :=
+      --       (16#466243b1#, 1, Bin_Id'Address, System.Null_Address);
+
+      function Build_Import_Pragma
+        (Bin_Id    : Entity_Id;
+         Pack_Body : Node_Id) return Node_Id;
+      --  Create a pragma that will bind the fat binary Bin_Id to its external
+      --  symbol. N is the package body Bin_Id belongs to, i.e.:
+      --
+      --     pragma Import
+      --        (Convention    => C,
+      --         Entity        => Bin_Id,
+      --         External_Name => "_binary_<Pack_Body's name>_fatbin_start");
+
+      function Build_Kernel_Name_Declaration
+        (Kernel : Entity_Id) return Node_Id;
+      --  Create the declaration of a C string that contains the name of
+      --  Kernel's symbol, i.e.:
+      --
+      --     Kernel : Interfaces.C.Strings.Chars_Ptr :=
+      --       Interfaces.C.Strings.New_Char_Array("<Kernel's name>");
+
+      function Build_Register_Function_Call
+        (Loc         : Source_Ptr;
+         Bin         : Entity_Id;
+         Kernel      : Entity_Id;
+         Kernel_Name : Entity_Id) return Node_Id;
+      --  Return a call to CUDA.Internal.Register_Function that binds Kernel
+      --  (the entity of a procedure) to the symbol described by the C string
+      --  Kernel_Name in the fat binary Bin, using Loc as location.
+
+      --------------------------
+      -- Build_CUDA_Init_Proc --
+      --------------------------
+
+      function Build_CUDA_Init_Proc
+        (Init_Id      : Entity_Id;
+         CUDA_Kernels : Elist_Id;
+         Handle_Id    : Entity_Id;
+         Pack_Decls   : List_Id) return Node_Id
+      is
+         Loc : constant Source_Ptr := Sloc (Init_Id);
+
+         Stmts : constant List_Id := New_List;
+         --  List of statements that will be used by the cuda initialization
+         --  function.
+
+         New_Stmt : Node_Id;
+         --  Temporary variable to hold the various newly-created nodes.
+
+         Kernel_Elmt : Elmt_Id;
+         Kernel_Id   : Entity_Id;
+
+      begin
+         --  For each CUDA_Global function, declare a C string that holds
+         --  its symbol's name (i.e. packagename __ functionname).
+
+         --  Also create a function call to CUDA.Internal.Register_Function
+         --  that takes the declared C string, a pointer to the function and
+         --  the fat binary handle.
+
+         Kernel_Elmt := First_Elmt (CUDA_Kernels);
+         while Present (Kernel_Elmt) loop
+            Kernel_Id := Node (Kernel_Elmt);
+
+            New_Stmt :=
+              Build_Kernel_Name_Declaration (Kernel_Id);
+            Append (New_Stmt, Pack_Decls);
+            Analyze (New_Stmt);
+
+            Append_To (Stmts,
+              Build_Register_Function_Call (Loc,
+                Bin         => Handle_Id,
+                Kernel      => Kernel_Id,
+                Kernel_Name => Defining_Entity (New_Stmt)));
+
+            Next_Elmt (Kernel_Elmt);
+         end loop;
+
+         --  Finish the CUDA initialization function: add a call to
+         --  register_fat_binary_end, to let the CUDA runtime know that we
+         --  won't be registering any other symbol with the current fat binary.
+
+         Append_To (Stmts,
+           Make_Function_Call (Loc,
+             Name                   =>
+               New_Occurrence_Of (RTE (RE_Register_Fat_Binary_End), Loc),
+             Parameter_Associations =>
+               New_List (New_Occurrence_Of (Handle_Id, Loc))));
+
+         --  Now that we have all the declarations and calls we need, we can
+         --  build and and return the initialization procedure.
+
+         return
+           Make_Subprogram_Body (Loc,
+             Specification              =>
+               Make_Procedure_Specification (Loc, Init_Id),
+             Declarations               => New_List,
+             Handled_Statement_Sequence =>
+               Make_Handled_Sequence_Of_Statements (Loc, Stmts));
+      end Build_CUDA_Init_Proc;
+
+      ----------------------------------
+      -- Build_Fat_Binary_Declaration --
+      ----------------------------------
+
+      function Build_Fat_Binary_Declaration
+        (Bin_Id : Entity_Id) return Node_Id
+      is
+      begin
+         return
+           Make_Object_Declaration (Sloc (Bin_Id),
+             Defining_Identifier => Bin_Id,
+             Object_Definition   =>
+               New_Occurrence_Of (RTE (RE_Address), Sloc (Bin_Id)));
+      end Build_Fat_Binary_Declaration;
+
+      -----------------------------------------
+      -- Build_Fat_Binary_Handle_Declaration --
+      -----------------------------------------
+
+      function Build_Fat_Binary_Handle_Declaration
+        (Handle_Id  : Entity_Id;
+         Wrapper_Id : Entity_Id) return Node_Id
+      is
+         Loc : constant Source_Ptr := Sloc (Handle_Id);
+      begin
+         --  Generate:
+         --    Handle_Id : System.Address :=
+         --      CUDA.Register_Fat_Binary (Wrapper_Id'Address);
+
+         return
+           Make_Object_Declaration (Loc,
+             Defining_Identifier => Handle_Id,
+             Object_Definition   => New_Occurrence_Of (RTE (RE_Address), Loc),
+             Expression          =>
+               Make_Function_Call (Loc,
+                 Name                   =>
+                   New_Occurrence_Of (RTE (RE_Register_Fat_Binary), Loc),
+                 Parameter_Associations => New_List (
+                   Make_Attribute_Reference (Loc,
+                     Prefix         =>
+                       New_Occurrence_Of (Wrapper_Id, Loc),
+                     Attribute_Name => Name_Address))));
+      end Build_Fat_Binary_Handle_Declaration;
+
+      ------------------------------------------
+      -- Build_Fat_Binary_Wrapper_Declaration --
+      ------------------------------------------
+
+      function Build_Fat_Binary_Wrapper_Declaration
+        (Wrapper_Id : Entity_Id;
+         Bin_Id     : Entity_Id) return Node_Id
+      is
+         Loc : constant Source_Ptr := Sloc (Wrapper_Id);
+      begin
+         return
+           Make_Object_Declaration (Loc,
+             Defining_Identifier => Wrapper_Id,
+             Object_Definition   =>
+               New_Occurrence_Of (RTE (RE_Fatbin_Wrapper), Loc),
+             Expression          =>
+               Make_Aggregate (Loc,
+                 Expressions => New_List (
+                   Make_Integer_Literal (Loc, UI_From_Int (16#466243b1#)),
+                   Make_Integer_Literal (Loc, UI_From_Int (1)),
+                   Make_Attribute_Reference (Loc,
+                     Prefix         => New_Occurrence_Of (Bin_Id, Loc),
+                     Attribute_Name => Name_Address),
+                   New_Occurrence_Of (RTE (RE_Null_Address), Loc))));
+      end Build_Fat_Binary_Wrapper_Declaration;
+
+      -------------------------
+      -- Build_Import_Pragma --
+      -------------------------
+
+      function Build_Import_Pragma
+        (Bin_Id    : Entity_Id;
+         Pack_Body : Node_Id) return Node_Id
+      is
+         Loc             : constant Source_Ptr := Sloc (Bin_Id);
+         External_Symbol : String_Id;
+      begin
+         Start_String;
+         Store_String_Chars
+           ("_binary_"
+            & Get_Name_String (Chars (Defining_Unit_Name (Pack_Body)))
+            & "_fatbin_start");
+         External_Symbol := End_String;
+
+         return
+           Make_Pragma (Loc,
+             Pragma_Identifier            =>
+               Make_Identifier (Loc, Name_Import),
+             Pragma_Argument_Associations => New_List (
+               Make_Pragma_Argument_Association (Loc,
+                 Chars      => Name_Convention,
+                 Expression => Make_Identifier (Loc, Name_C)),
+               Make_Pragma_Argument_Association (Loc,
+                 Chars      => Name_Entity,
+                 Expression => New_Occurrence_Of (Bin_Id, Loc)),
+               Make_Pragma_Argument_Association (Loc,
+                 Chars      => Name_External_Name,
+                 Expression => Make_String_Literal (Loc, External_Symbol))));
+      end Build_Import_Pragma;
+
+      -------------------------------------
+      -- Build_Kernel_Name_Declaration --
+      -------------------------------------
+
+      function Build_Kernel_Name_Declaration
+        (Kernel : Entity_Id) return Node_Id
+      is
+         Loc : constant Source_Ptr := Sloc (Kernel);
+
+         Package_Name : constant String :=
+           Get_Name_String (Chars (Scope (Kernel)));
+
+         Symbol_Name : constant String := Get_Name_String (Chars (Kernel));
+
+         Kernel_Name : String_Id;
+      begin
+         Start_String;
+         Store_String_Chars (Package_Name & "__" & Symbol_Name);
+         Kernel_Name := End_String;
+
+         return
+           Make_Object_Declaration (Loc,
+             Defining_Identifier => Make_Temporary (Loc, 'C'),
+             Object_Definition   =>
+               New_Occurrence_Of (RTE (RE_Chars_Ptr), Loc),
+             Expression          =>
+               Make_Function_Call (Loc,
+                 Name                   =>
+                   New_Occurrence_Of (RTE (RE_New_Char_Array), Loc),
+                 Parameter_Associations => New_List (
+                   Make_String_Literal (Loc, Kernel_Name))));
+      end Build_Kernel_Name_Declaration;
+
+      ----------------------------------
+      -- Build_Register_Function_Call --
+      ----------------------------------
+
+      function Build_Register_Function_Call
+        (Loc         : Source_Ptr;
+         Bin         : Entity_Id;
+         Kernel      : Entity_Id;
+         Kernel_Name : Entity_Id) return Node_Id
+      is
+         Args : constant List_Id := New_List;
+      begin
+         --  First argument: the handle of the fat binary.
+
+         Append (New_Occurrence_Of (Bin, Loc), Args);
+
+         --  Second argument: the host address of the function that is
+         --  marked with CUDA_Global.
+
+         Append_To (Args,
+           Make_Attribute_Reference (Loc,
+             Prefix         => New_Occurrence_Of (Kernel, Loc),
+             Attribute_Name => Name_Address));
+
+         --  Third argument, the name of the function on the host.
+
+         Append (New_Occurrence_Of (Kernel_Name, Loc), Args);
+
+         --  Fourth argument, the name of the function on the device.
+
+         Append (New_Occurrence_Of (Kernel_Name, Loc), Args);
+
+         --  Fith argument: -1. Meaning unknown - this has been copied from
+         --  LLVM.
+
+         Append (Make_Integer_Literal (Loc, UI_From_Int (-1)), Args);
+
+         --  Args 6, 7, 8, 9, 10: Null pointers. Again, meaning unknown.
+
+         for Arg_Count in 1 .. 5 loop
+            Append_To (Args, New_Occurrence_Of (RTE (RE_Null_Address), Loc));
+         end loop;
+
+         --  Build the call to CUDARegisterFunction, passing the argument
+         --  list we just built.
+
+         return
+           Make_Function_Call (Loc,
+             Name                   =>
+               New_Occurrence_Of (RTE (RE_Register_Function), Loc),
+             Parameter_Associations => Args);
+      end Build_Register_Function_Call;
+
+      --  Local declarations
+
+      Loc : constant Source_Ptr := Sloc (N);
+
+      Spec_Id : constant Node_Id := Corresponding_Spec (N);
+      --  The specification of the package we're adding a cuda init func to.
+
+      Pack_Decls : constant List_Id := Declarations (N);
+
+      CUDA_Node_List : constant Elist_Id := Get_CUDA_Kernels (Spec_Id);
+      --  CUDA nodes that belong to the package.
+
+      CUDA_Init_Func : Entity_Id;
+      --  Entity of the cuda init func.
+
+      Fat_Binary : Entity_Id;
+      --  Entity of the fat binary of N. Bound to said fat binary by a pragma.
+
+      Fat_Binary_Handle : Entity_Id;
+      --  Entity of the result of passing the fat binary wrapper to.
+      --  CUDA.Register_Fat_Binary.
+
+      Fat_Binary_Wrapper : Entity_Id;
+      --  Entity of a record that holds a bunch of magic numbers and a
+      --  reference to Fat_Binary.
+
+      New_Stmt : Node_Id;
+      --  Node to store newly-created declarations
+
+   --  Start of processing for Build_And_Insert_CUDA_Initialization
+
+   begin
+      if CUDA_Node_List = No_Elist or not Debug_Flag_Underscore_C then
+         return;
+      end if;
+
+      Fat_Binary := Make_Temporary (Loc, 'C');
+      New_Stmt := Build_Fat_Binary_Declaration (Fat_Binary);
+      Append_To (Pack_Decls, New_Stmt);
+      Analyze (New_Stmt);
+
+      New_Stmt := Build_Import_Pragma (Fat_Binary, N);
+      Append_To (Pack_Decls, New_Stmt);
+      Analyze (New_Stmt);
+
+      Fat_Binary_Wrapper := Make_Temporary (Loc, 'C');
+      New_Stmt :=
+        Build_Fat_Binary_Wrapper_Declaration
+          (Wrapper_Id => Fat_Binary_Wrapper,
+           Bin_Id     => Fat_Binary);
+      Append_To (Pack_Decls, New_Stmt);
+      Analyze (New_Stmt);
+
+      Fat_Binary_Handle := Make_Temporary (Loc, 'C');
+      New_Stmt :=
+        Build_Fat_Binary_Handle_Declaration
+          (Fat_Binary_Handle, Fat_Binary_Wrapper);
+      Append_To (Pack_Decls, New_Stmt);
+      Analyze (New_Stmt);
+
+      CUDA_Init_Func := Make_Temporary (Loc, 'C');
+      New_Stmt :=
+        Build_CUDA_Init_Proc
+          (Init_Id      => CUDA_Init_Func,
+           CUDA_Kernels => CUDA_Node_List,
+           Handle_Id    => Fat_Binary_Handle,
+           Pack_Decls   => Pack_Decls);
+      Append_To (Pack_Decls, New_Stmt);
+      Analyze (New_Stmt);
+
+      New_Stmt :=
+        Make_Procedure_Call_Statement (Loc,
+          Name => New_Occurrence_Of (CUDA_Init_Func, Loc));
+      Append_To (Pack_Decls, New_Stmt);
+      Analyze (New_Stmt);
+   end Build_And_Insert_CUDA_Initialization;
+
+   --------------------
+   -- Set_CUDA_Nodes --
+   --------------------
+
+   procedure Set_CUDA_Kernels
+     (Pack_Id : Entity_Id;
+      Kernels : Elist_Id)
+   is
+   begin
+      pragma Assert (Get_CUDA_Kernels (Pack_Id) = No_Elist);
+      CUDA_Kernels_Table.Set (Pack_Id, Kernels);
+   end Set_CUDA_Kernels;
+
+end GNAT_CUDA;
diff --git a/gcc/ada/gnat_cuda.ads b/gcc/ada/gnat_cuda.ads
new file mode 100644
index 0000000..e27be34
--- /dev/null
+++ b/gcc/ada/gnat_cuda.ads
@@ -0,0 +1,107 @@
+------------------------------------------------------------------------------
+--                                                                          --
+--                         GNAT COMPILER COMPONENTS                         --
+--                                                                          --
+--                                 C U D A                                  --
+--                                                                          --
+--                                 S p e c                                  --
+--                                                                          --
+--          Copyright (C) 2010-2020, Free Software Foundation, Inc.         --
+--                                                                          --
+-- GNAT is free software;  you can  redistribute it  and/or modify it under --
+-- terms of the  GNU General Public License as published  by the Free Soft- --
+-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
+-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
+-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
+-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
+-- for  more details.  You should have  received  a copy of the GNU General --
+-- Public License  distributed with GNAT; see file COPYING3.  If not, go to --
+-- http://www.gnu.org/licenses for a complete copy of the license.          --
+--                                                                          --
+-- GNAT was originally developed  by the GNAT team at  New York University. --
+-- Extensive contributions were provided by Ada Core Technologies Inc.      --
+--                                                                          --
+------------------------------------------------------------------------------
+
+--  This package defines CUDA-specific datastructures and subprograms.
+--
+--  Compiling for CUDA requires compiling for two targets. One is the CPU (more
+--  frequently named "host"), the other is the GPU (the "device"). Compiling
+--  for the host requires compiling the whole program. Compiling for the device
+--  only requires compiling packages that contain CUDA kernels.
+--
+--  When compiling for the device, GNAT-LLVM is used. It produces assembly
+--  tailored to Nvidia's GPU (NVPTX). This NVPTX code is then assembled into
+--  an object file by ptxas, an assembler provided by Nvidia. This object file
+--  is then combined with its source code into a fat binary by a tool named
+--  `fatbin`, also provided by Nvidia. The resulting fat binary is turned into
+--  a regular object file by the host's linker and linked with the program that
+--  executes on the host.
+--
+--  A CUDA kernel is a procedure marked with the CUDA_Global pragma or aspect.
+--  CUDA_Global does not have any effect when compiling for the device. When
+--  compiling for the host, the frontend stores procedures marked with
+--  CUDA_Global in a hash table the key of which is the Node_Id of the package
+--  body that contains the CUDA_Global procedure. This is done in sem_prag.adb.
+--  Once the declarations of a package body have been analyzed, variable, type
+--  and procedure declarations necessary for the initialization of the CUDA
+--  runtime are appended to the package that contains the CUDA_Global
+--  procedure.
+--
+--  These declarations are used to register the CUDA kernel with the CUDA
+--  runtime when the program is launched. Registering a CUDA kernel with the
+--  CUDA runtime requires multiple function calls:
+--  - The first one registers the fat binary which corresponds to the package
+--    with the CUDA runtime.
+--  - Then, as many function calls as there are kernels in order to bind them
+--    with the fat binary.
+--    fat binary.
+--  - The last call lets the CUDA runtime know that we are done initializing
+--    CUDA.
+--  Expansion of the CUDA_Global aspect is triggered in sem_ch7.adb, during
+--  analysis of the package. All of this expansion is performed in the
+--  Insert_CUDA_Initialization procedure defined in GNAT_CUDA.
+--
+--  Once a CUDA package is initialized, its kernels are ready to be used.
+--  Launching CUDA kernels is done by using the CUDA_Execute pragma. When
+--  compiling for the host, the CUDA_Execute pragma is expanded into a declare
+--  block which performs calls to the CUDA runtime functions.
+--  - The first one pushes a "launch configuration" on the "configuration
+--    stack" of the CUDA runtime.
+--  - The second call pops this call configuration, making it effective.
+--  - The third call actually launches the kernel.
+--  Light validation of the CUDA_Execute pragma is performed in sem_prag.adb
+--  and expansion is performed in exp_prag.adb.
+
+with Types; use Types;
+
+package GNAT_CUDA is
+
+   procedure Add_CUDA_Kernel (Pack_Id : Entity_Id; Kernel : Entity_Id);
+   --  Add Kernel to the list of CUDA_Global nodes that belong to Pack_Id.
+   --  Kernel is a procedure entity marked with CUDA_Global, Pack_Id is the
+   --  entity of its parent package body.
+
+   procedure Build_And_Insert_CUDA_Initialization (N : Node_Id);
+   --  Builds declarations necessary for CUDA initialization and inserts them
+   --  in N, the package body that contains CUDA_Global nodes. These
+   --  declarations are:
+   --
+   --    * A symbol to hold the pointer to the CUDA fat binary
+   --
+   --    * A type definition for a wrapper that contains the pointer to the
+   --      CUDA fat binary
+   --
+   --    * An object of the aforementioned type to hold the aforementioned
+   --      pointer.
+   --
+   --    * For each CUDA_Global procedure in the package, a declaration of a C
+   --      string containing the function's name.
+   --
+   --    * A function that takes care of calling CUDA functions that register
+   --      CUDA_Global procedures with the runtime.
+   --
+   --    * A boolean that holds the result of the call to the aforementioned
+   --      function.
+
+end GNAT_CUDA;
diff --git a/gcc/ada/rtsfind.adb b/gcc/ada/rtsfind.adb
index 83220ef..872ce01 100644
--- a/gcc/ada/rtsfind.adb
+++ b/gcc/ada/rtsfind.adb
@@ -589,7 +589,10 @@ package body Rtsfind is
      range CUDA_Driver_Types .. CUDA_Vector_Types;
 
    subtype Interfaces_Descendant is RTU_Id
-     range Interfaces_C .. Interfaces_Packed_Decimal;
+     range Interfaces_C .. Interfaces_C_Strings;
+
+   subtype Interfaces_C_Descendant is Interfaces_Descendant
+     range Interfaces_C_Strings .. Interfaces_C_Strings;
 
    subtype System_Descendant is RTU_Id
      range System_Address_Image .. System_Tasking_Stages;
@@ -674,6 +677,10 @@ package body Rtsfind is
       elsif U_Id in Interfaces_Descendant then
          Name_Buffer (11) := '.';
 
+         if U_Id in Interfaces_C_Descendant then
+            Name_Buffer (13) := '.';
+         end if;
+
       elsif U_Id in System_Descendant then
          Name_Buffer (7) := '.';
 
diff --git a/gcc/ada/rtsfind.ads b/gcc/ada/rtsfind.ads
index ed6b671..1c8a294 100644
--- a/gcc/ada/rtsfind.ads
+++ b/gcc/ada/rtsfind.ads
@@ -179,6 +179,10 @@ package Rtsfind is
       Interfaces_C,
       Interfaces_Packed_Decimal,
 
+      --  Children of Interfaces.C
+
+      Interfaces_C_Strings,
+
       --  Package System
 
       System,
@@ -628,8 +632,12 @@ package Rtsfind is
 
      RE_Stream_T,                        -- CUDA.Driver_Types
 
+     RE_Fatbin_Wrapper,                  -- CUDA.Internal
      RE_Push_Call_Configuration,         -- CUDA.Internal
      RE_Pop_Call_Configuration,          -- CUDA.Internal
+     RE_Register_Fat_Binary,             -- CUDA.Internal
+     RE_Register_Fat_Binary_End,         -- CUDA.Internal
+     RE_Register_Function,               -- CUDA.Internal
 
      RE_Launch_Kernel,                   -- CUDA.Runtime_Api
 
@@ -647,6 +655,9 @@ package Rtsfind is
      RO_IC_Unsigned,                     -- Interfaces.C
      RO_IC_Unsigned_Long_Long,           -- Interfaces.C
 
+     RE_Chars_Ptr,                       -- Interfaces.C.Strings
+     RE_New_Char_Array,                  -- Interfaces.C.Strings
+
      RE_Address,                         -- System
      RE_Any_Priority,                    -- System
      RE_Bit_Order,                       -- System
@@ -1927,8 +1938,12 @@ package Rtsfind is
 
      RE_Stream_T                         => CUDA_Driver_Types,
 
+     RE_Fatbin_Wrapper                   => CUDA_Internal,
      RE_Push_Call_Configuration          => CUDA_Internal,
      RE_Pop_Call_Configuration           => CUDA_Internal,
+     RE_Register_Fat_Binary              => CUDA_Internal,
+     RE_Register_Fat_Binary_End          => CUDA_Internal,
+     RE_Register_Function                => CUDA_Internal,
 
      RE_Launch_Kernel                    => CUDA_Runtime_Api,
 
@@ -1946,6 +1961,9 @@ package Rtsfind is
      RO_IC_Unsigned                      => Interfaces_C,
      RO_IC_Unsigned_Long_Long            => Interfaces_C,
 
+     RE_Chars_Ptr                        => Interfaces_C_Strings,
+     RE_New_Char_Array                   => Interfaces_C_Strings,
+
      RE_Address                          => System,
      RE_Any_Priority                     => System,
      RE_Bit_Order                        => System,
diff --git a/gcc/ada/sem_ch7.adb b/gcc/ada/sem_ch7.adb
index 04ff071..762f0c1 100644
--- a/gcc/ada/sem_ch7.adb
+++ b/gcc/ada/sem_ch7.adb
@@ -40,6 +40,7 @@ with Exp_Dist;  use Exp_Dist;
 with Exp_Dbug;  use Exp_Dbug;
 with Freeze;    use Freeze;
 with Ghost;     use Ghost;
+with GNAT_CUDA; use GNAT_CUDA;
 with Lib;       use Lib;
 with Lib.Xref;  use Lib.Xref;
 with Namet;     use Namet;
@@ -999,6 +1000,13 @@ package body Sem_Ch7 is
          Analyze_List (Declarations (N));
       end if;
 
+      --  If procedures marked with CUDA_Global have been defined within N, we
+      --  need to register them with the CUDA runtime at program startup. This
+      --  requires multiple declarations and function calls which need to be
+      --  appended to N's declarations.
+
+      Build_And_Insert_CUDA_Initialization (N);
+
       HSS := Handled_Statement_Sequence (N);
 
       if Present (HSS) then
diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
index d9d957b..33a3f7a 100644
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -44,6 +44,7 @@ with Exp_Util;  use Exp_Util;
 with Expander;  use Expander;
 with Freeze;    use Freeze;
 with Ghost;     use Ghost;
+with GNAT_CUDA; use GNAT_CUDA;
 with Gnatvsn;   use Gnatvsn;
 with Lib;       use Lib;
 with Lib.Writ;  use Lib.Writ;
@@ -14892,6 +14893,7 @@ package body Sem_Prag is
 
             else
                Set_Is_CUDA_Kernel (Kernel_Proc);
+               Add_CUDA_Kernel (Pack_Id, Kernel_Proc);
             end if;
          end CUDA_Global;
 
-- 
2.7.4