From b0a16e6d4c91120dd9a2900da0831e83e65f2046 Mon Sep 17 00:00:00 2001 From: Ghjuvan Lacambre Date: Thu, 30 Jan 2020 11:47:00 +0100 Subject: [PATCH] [Ada] Implement initialization of CUDA runtime gcc/ada/ * debug.adb: Document -gnatd_c flag as being used for CUDA. * gnat_cuda.ads: New file. * gnat_cuda.adb: New file. * rtsfind.ads: Add Interfaces_C_Strings package and RE_Fatbin_Wrapper, RE_Register_Fat_Binary, RE_Register_Fat_Binary_End, RE_Register_Function, RE_Chars_Ptr, RE_New_Char_Array entities. * rtsfind.adb: Create new Interfaces_C_Descendant subtype, handle it. * sem_ch7.adb (Analyze_Package_Body_Helper): Call CUDA init procedure. * sem_prag.adb (Analyze_Pragma): Call Add_Cuda_Kernel procedure. * gcc-interface/Make-lang.in (GNAT_ADA_OBJS): Add gnat_cuda.o. --- gcc/ada/debug.adb | 2 +- gcc/ada/gcc-interface/Make-lang.in | 1 + gcc/ada/gnat_cuda.adb | 586 +++++++++++++++++++++++++++++++++++++ gcc/ada/gnat_cuda.ads | 107 +++++++ gcc/ada/rtsfind.adb | 9 +- gcc/ada/rtsfind.ads | 18 ++ gcc/ada/sem_ch7.adb | 8 + gcc/ada/sem_prag.adb | 2 + 8 files changed, 731 insertions(+), 2 deletions(-) create mode 100644 gcc/ada/gnat_cuda.adb create mode 100644 gcc/ada/gnat_cuda.ads diff --git a/gcc/ada/debug.adb b/gcc/ada/debug.adb index bbdaf3b..4eb3d5b 100644 --- a/gcc/ada/debug.adb +++ b/gcc/ada/debug.adb @@ -147,7 +147,7 @@ package body Debug is -- d_a Stop elaboration checks on accept or select statement -- d_b - -- d_c + -- d_c CUDA compilation : compile for the host -- d_d -- d_e Ignore entry calls and requeue statements for elaboration -- d_f Issue info messages related to GNATprove usage diff --git a/gcc/ada/gcc-interface/Make-lang.in b/gcc/ada/gcc-interface/Make-lang.in index d950217..78fe602 100644 --- a/gcc/ada/gcc-interface/Make-lang.in +++ b/gcc/ada/gcc-interface/Make-lang.in @@ -327,6 +327,7 @@ GNAT_ADA_OBJS = \ ada/libgnat/g-u3spch.o \ ada/get_targ.o \ ada/ghost.o \ + ada/gnat_cuda.o \ ada/libgnat/gnat.o \ ada/gnatvsn.o \ ada/hostparm.o \ diff --git a/gcc/ada/gnat_cuda.adb b/gcc/ada/gnat_cuda.adb new file mode 100644 index 0000000..fef0d18 --- /dev/null +++ b/gcc/ada/gnat_cuda.adb @@ -0,0 +1,586 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT COMPILER COMPONENTS -- +-- -- +-- C U D A -- +-- -- +-- B o d y -- +-- -- +-- Copyright (C) 2010-2020, Free Software Foundation, Inc. -- +-- -- +-- GNAT is free software; you can redistribute it and/or modify it under -- +-- terms of the GNU General Public License as published by the Free Soft- -- +-- ware Foundation; either version 3, or (at your option) any later ver- -- +-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- +-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- +-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- +-- for more details. You should have received a copy of the GNU General -- +-- Public License distributed with GNAT; see file COPYING3. If not, go to -- +-- http://www.gnu.org/licenses for a complete copy of the license. -- +-- -- +-- GNAT was originally developed by the GNAT team at New York University. -- +-- Extensive contributions were provided by Ada Core Technologies Inc. -- +-- -- +------------------------------------------------------------------------------ + +-- This package defines CUDA-specific datastructures and functions. + +with Atree; use Atree; +with Debug; use Debug; +with Elists; use Elists; +with Namet; use Namet; +with Nlists; use Nlists; +with Nmake; use Nmake; +with Rtsfind; use Rtsfind; +with Sinfo; use Sinfo; +with Stringt; use Stringt; +with Tbuild; use Tbuild; +with Uintp; use Uintp; +with Sem; use Sem; +with Sem_Util; use Sem_Util; +with Snames; use Snames; + +with GNAT.HTable; + +package body GNAT_CUDA is + + -------------------------------------- + -- Hash Table for CUDA_Global nodes -- + -------------------------------------- + + type Hash_Range is range 0 .. 510; + -- Size of hash table headers + + function Hash (F : Entity_Id) return Hash_Range; + -- Hash function for hash table + + package CUDA_Kernels_Table is new + GNAT.HTable.Simple_HTable + (Header_Num => Hash_Range, + Element => Elist_Id, + No_Element => No_Elist, + Key => Entity_Id, + Hash => Hash, + Equal => "="); + -- The keys of this table are package entities whose bodies contain at + -- least one procedure marked with aspect CUDA_Global. The values are + -- Elists of the marked procedures. + + function Get_CUDA_Kernels (Pack_Id : Entity_Id) return Elist_Id; + -- Returns an Elist of all procedures marked with pragma CUDA_Global that + -- are declared within package body Pack_Body. Returns No_Elist if + -- Pack_Id does not contain such procedures. + + procedure Set_CUDA_Kernels + (Pack_Id : Entity_Id; + Kernels : Elist_Id); + -- Stores Kernels as the list of kernels belonging to the package entity + -- Pack_Id. Pack_Id must not have a list of kernels. + + --------------------- + -- Add_CUDA_Kernel -- + --------------------- + + procedure Add_CUDA_Kernel + (Pack_Id : Entity_Id; + Kernel : Entity_Id) + is + Kernels : Elist_Id := Get_CUDA_Kernels (Pack_Id); + begin + if Kernels = No_Elist then + Kernels := New_Elmt_List; + Set_CUDA_Kernels (Pack_Id, Kernels); + end if; + Append_Elmt (Kernel, Kernels); + end Add_CUDA_Kernel; + + ---------- + -- Hash -- + ---------- + + function Hash (F : Entity_Id) return Hash_Range is + begin + return Hash_Range (F mod 511); + end Hash; + + ---------------------- + -- Get_CUDA_Kernels -- + ---------------------- + + function Get_CUDA_Kernels (Pack_Id : Entity_Id) return Elist_Id is + begin + return CUDA_Kernels_Table.Get (Pack_Id); + end Get_CUDA_Kernels; + + ------------------------------------------ + -- Build_And_Insert_CUDA_Initialization -- + ------------------------------------------ + + procedure Build_And_Insert_CUDA_Initialization (N : Node_Id) is + + -- For the following kernel declaration: + -- + -- package body is + -- procedure (X : Integer) with CUDA_Global; + -- end package; + -- + -- Insert the following declarations: + -- + -- Fat_Binary : System.Address; + -- pragma Import + -- (Convention => C, + -- Entity => Fat_Binary, + -- External_Name => "_binary__fatbin_start"); + -- + -- Wrapper : Fatbin_Wrapper := + -- (16#466243b1#, 1, Fat_Binary'Address, System.Null_Address); + -- + -- Proc_Symbol_Name : Interfaces.C.Strings.Chars_Ptr := + -- Interfaces.C.Strings.New_Char_Array(""); + -- + -- Fat_Binary_Handle : System.Address := + -- CUDA.Internal.Register_Fat_Binary (Wrapper'Address); + -- + -- procedure Initialize_CUDA_Kernel is + -- begin + -- CUDA.Internal.Register_Function + -- (Fat_Binary_Handle, + -- 'Address, + -- Proc_Symbol_Name, + -- Proc_Symbol_Name, + -- -1, + -- System.Null_Address, + -- System.Null_Address, + -- System.Null_Address, + -- System.Null_Address, + -- System.Null_Address); + -- CUDA.Internal.Register_Fat_Binary_End (Fat_Binary_Handle); + -- end Initialize_CUDA_Kernel; + -- + -- Proc_Symbol_Name is the name of the procedure marked with + -- CUDA_Global. The CUDA runtime uses this in order to be able to find + -- kernels in the fat binary, so it has to match the name of the + -- procedure symbol compiled by GNAT_LLVM. When looking at the code + -- generated by NVCC, it seems that the CUDA runtime also needs the name + -- of the procedure symbol of the host. Fortuantely, the procedures are + -- named the same way whether they are compiled for the host or the + -- device, so we use Vector_Add_Name to specify the name of the symbol + -- for both the host and the device. The meaning of the rest of the + -- arguments is unknown. + + function Build_CUDA_Init_Proc + (Init_Id : Entity_Id; + CUDA_Kernels : Elist_Id; + Handle_Id : Entity_Id; + Pack_Decls : List_Id) return Node_Id; + -- Create the declaration of Init_Id, the function that binds each + -- kernel present in CUDA_Kernels with the fat binary Handle_Id and then + -- tells the CUDA runtime that no new function will be bound to the fat + -- binary. + + function Build_Fat_Binary_Declaration + (Bin_Id : Entity_Id) return Node_Id; + -- Create a declaration for Bin_Id, the entity that represents the fat + -- binary, i.e.: + -- + -- Bin_Id : System.Address; + + function Build_Fat_Binary_Handle_Declaration + (Handle_Id : Entity_Id; + Wrapper_Id : Entity_Id) return Node_Id; + -- Create the declaration of Handle_Id, a System.Address that will + -- receive the results of passing the address of Wrapper_Id to + -- CUDA.Register_Fat_Binary, i.e.: + -- + -- Handle_Id : System.Address := + -- CUDA.Register_Fat_Binary (Wrapper_Id'Address) + + function Build_Fat_Binary_Wrapper_Declaration + (Wrapper_Id : Entity_Id; + Bin_Id : Entity_Id) return Node_Id; + -- Create the declaration of the fat binary wrapper Wrapper_Id, which + -- holds magic numbers and Bin_Id'Address, i.e.: + -- + -- Wrapper_Id : System.Address := + -- (16#466243b1#, 1, Bin_Id'Address, System.Null_Address); + + function Build_Import_Pragma + (Bin_Id : Entity_Id; + Pack_Body : Node_Id) return Node_Id; + -- Create a pragma that will bind the fat binary Bin_Id to its external + -- symbol. N is the package body Bin_Id belongs to, i.e.: + -- + -- pragma Import + -- (Convention => C, + -- Entity => Bin_Id, + -- External_Name => "_binary__fatbin_start"); + + function Build_Kernel_Name_Declaration + (Kernel : Entity_Id) return Node_Id; + -- Create the declaration of a C string that contains the name of + -- Kernel's symbol, i.e.: + -- + -- Kernel : Interfaces.C.Strings.Chars_Ptr := + -- Interfaces.C.Strings.New_Char_Array(""); + + function Build_Register_Function_Call + (Loc : Source_Ptr; + Bin : Entity_Id; + Kernel : Entity_Id; + Kernel_Name : Entity_Id) return Node_Id; + -- Return a call to CUDA.Internal.Register_Function that binds Kernel + -- (the entity of a procedure) to the symbol described by the C string + -- Kernel_Name in the fat binary Bin, using Loc as location. + + -------------------------- + -- Build_CUDA_Init_Proc -- + -------------------------- + + function Build_CUDA_Init_Proc + (Init_Id : Entity_Id; + CUDA_Kernels : Elist_Id; + Handle_Id : Entity_Id; + Pack_Decls : List_Id) return Node_Id + is + Loc : constant Source_Ptr := Sloc (Init_Id); + + Stmts : constant List_Id := New_List; + -- List of statements that will be used by the cuda initialization + -- function. + + New_Stmt : Node_Id; + -- Temporary variable to hold the various newly-created nodes. + + Kernel_Elmt : Elmt_Id; + Kernel_Id : Entity_Id; + + begin + -- For each CUDA_Global function, declare a C string that holds + -- its symbol's name (i.e. packagename __ functionname). + + -- Also create a function call to CUDA.Internal.Register_Function + -- that takes the declared C string, a pointer to the function and + -- the fat binary handle. + + Kernel_Elmt := First_Elmt (CUDA_Kernels); + while Present (Kernel_Elmt) loop + Kernel_Id := Node (Kernel_Elmt); + + New_Stmt := + Build_Kernel_Name_Declaration (Kernel_Id); + Append (New_Stmt, Pack_Decls); + Analyze (New_Stmt); + + Append_To (Stmts, + Build_Register_Function_Call (Loc, + Bin => Handle_Id, + Kernel => Kernel_Id, + Kernel_Name => Defining_Entity (New_Stmt))); + + Next_Elmt (Kernel_Elmt); + end loop; + + -- Finish the CUDA initialization function: add a call to + -- register_fat_binary_end, to let the CUDA runtime know that we + -- won't be registering any other symbol with the current fat binary. + + Append_To (Stmts, + Make_Function_Call (Loc, + Name => + New_Occurrence_Of (RTE (RE_Register_Fat_Binary_End), Loc), + Parameter_Associations => + New_List (New_Occurrence_Of (Handle_Id, Loc)))); + + -- Now that we have all the declarations and calls we need, we can + -- build and and return the initialization procedure. + + return + Make_Subprogram_Body (Loc, + Specification => + Make_Procedure_Specification (Loc, Init_Id), + Declarations => New_List, + Handled_Statement_Sequence => + Make_Handled_Sequence_Of_Statements (Loc, Stmts)); + end Build_CUDA_Init_Proc; + + ---------------------------------- + -- Build_Fat_Binary_Declaration -- + ---------------------------------- + + function Build_Fat_Binary_Declaration + (Bin_Id : Entity_Id) return Node_Id + is + begin + return + Make_Object_Declaration (Sloc (Bin_Id), + Defining_Identifier => Bin_Id, + Object_Definition => + New_Occurrence_Of (RTE (RE_Address), Sloc (Bin_Id))); + end Build_Fat_Binary_Declaration; + + ----------------------------------------- + -- Build_Fat_Binary_Handle_Declaration -- + ----------------------------------------- + + function Build_Fat_Binary_Handle_Declaration + (Handle_Id : Entity_Id; + Wrapper_Id : Entity_Id) return Node_Id + is + Loc : constant Source_Ptr := Sloc (Handle_Id); + begin + -- Generate: + -- Handle_Id : System.Address := + -- CUDA.Register_Fat_Binary (Wrapper_Id'Address); + + return + Make_Object_Declaration (Loc, + Defining_Identifier => Handle_Id, + Object_Definition => New_Occurrence_Of (RTE (RE_Address), Loc), + Expression => + Make_Function_Call (Loc, + Name => + New_Occurrence_Of (RTE (RE_Register_Fat_Binary), Loc), + Parameter_Associations => New_List ( + Make_Attribute_Reference (Loc, + Prefix => + New_Occurrence_Of (Wrapper_Id, Loc), + Attribute_Name => Name_Address)))); + end Build_Fat_Binary_Handle_Declaration; + + ------------------------------------------ + -- Build_Fat_Binary_Wrapper_Declaration -- + ------------------------------------------ + + function Build_Fat_Binary_Wrapper_Declaration + (Wrapper_Id : Entity_Id; + Bin_Id : Entity_Id) return Node_Id + is + Loc : constant Source_Ptr := Sloc (Wrapper_Id); + begin + return + Make_Object_Declaration (Loc, + Defining_Identifier => Wrapper_Id, + Object_Definition => + New_Occurrence_Of (RTE (RE_Fatbin_Wrapper), Loc), + Expression => + Make_Aggregate (Loc, + Expressions => New_List ( + Make_Integer_Literal (Loc, UI_From_Int (16#466243b1#)), + Make_Integer_Literal (Loc, UI_From_Int (1)), + Make_Attribute_Reference (Loc, + Prefix => New_Occurrence_Of (Bin_Id, Loc), + Attribute_Name => Name_Address), + New_Occurrence_Of (RTE (RE_Null_Address), Loc)))); + end Build_Fat_Binary_Wrapper_Declaration; + + ------------------------- + -- Build_Import_Pragma -- + ------------------------- + + function Build_Import_Pragma + (Bin_Id : Entity_Id; + Pack_Body : Node_Id) return Node_Id + is + Loc : constant Source_Ptr := Sloc (Bin_Id); + External_Symbol : String_Id; + begin + Start_String; + Store_String_Chars + ("_binary_" + & Get_Name_String (Chars (Defining_Unit_Name (Pack_Body))) + & "_fatbin_start"); + External_Symbol := End_String; + + return + Make_Pragma (Loc, + Pragma_Identifier => + Make_Identifier (Loc, Name_Import), + Pragma_Argument_Associations => New_List ( + Make_Pragma_Argument_Association (Loc, + Chars => Name_Convention, + Expression => Make_Identifier (Loc, Name_C)), + Make_Pragma_Argument_Association (Loc, + Chars => Name_Entity, + Expression => New_Occurrence_Of (Bin_Id, Loc)), + Make_Pragma_Argument_Association (Loc, + Chars => Name_External_Name, + Expression => Make_String_Literal (Loc, External_Symbol)))); + end Build_Import_Pragma; + + ------------------------------------- + -- Build_Kernel_Name_Declaration -- + ------------------------------------- + + function Build_Kernel_Name_Declaration + (Kernel : Entity_Id) return Node_Id + is + Loc : constant Source_Ptr := Sloc (Kernel); + + Package_Name : constant String := + Get_Name_String (Chars (Scope (Kernel))); + + Symbol_Name : constant String := Get_Name_String (Chars (Kernel)); + + Kernel_Name : String_Id; + begin + Start_String; + Store_String_Chars (Package_Name & "__" & Symbol_Name); + Kernel_Name := End_String; + + return + Make_Object_Declaration (Loc, + Defining_Identifier => Make_Temporary (Loc, 'C'), + Object_Definition => + New_Occurrence_Of (RTE (RE_Chars_Ptr), Loc), + Expression => + Make_Function_Call (Loc, + Name => + New_Occurrence_Of (RTE (RE_New_Char_Array), Loc), + Parameter_Associations => New_List ( + Make_String_Literal (Loc, Kernel_Name)))); + end Build_Kernel_Name_Declaration; + + ---------------------------------- + -- Build_Register_Function_Call -- + ---------------------------------- + + function Build_Register_Function_Call + (Loc : Source_Ptr; + Bin : Entity_Id; + Kernel : Entity_Id; + Kernel_Name : Entity_Id) return Node_Id + is + Args : constant List_Id := New_List; + begin + -- First argument: the handle of the fat binary. + + Append (New_Occurrence_Of (Bin, Loc), Args); + + -- Second argument: the host address of the function that is + -- marked with CUDA_Global. + + Append_To (Args, + Make_Attribute_Reference (Loc, + Prefix => New_Occurrence_Of (Kernel, Loc), + Attribute_Name => Name_Address)); + + -- Third argument, the name of the function on the host. + + Append (New_Occurrence_Of (Kernel_Name, Loc), Args); + + -- Fourth argument, the name of the function on the device. + + Append (New_Occurrence_Of (Kernel_Name, Loc), Args); + + -- Fith argument: -1. Meaning unknown - this has been copied from + -- LLVM. + + Append (Make_Integer_Literal (Loc, UI_From_Int (-1)), Args); + + -- Args 6, 7, 8, 9, 10: Null pointers. Again, meaning unknown. + + for Arg_Count in 1 .. 5 loop + Append_To (Args, New_Occurrence_Of (RTE (RE_Null_Address), Loc)); + end loop; + + -- Build the call to CUDARegisterFunction, passing the argument + -- list we just built. + + return + Make_Function_Call (Loc, + Name => + New_Occurrence_Of (RTE (RE_Register_Function), Loc), + Parameter_Associations => Args); + end Build_Register_Function_Call; + + -- Local declarations + + Loc : constant Source_Ptr := Sloc (N); + + Spec_Id : constant Node_Id := Corresponding_Spec (N); + -- The specification of the package we're adding a cuda init func to. + + Pack_Decls : constant List_Id := Declarations (N); + + CUDA_Node_List : constant Elist_Id := Get_CUDA_Kernels (Spec_Id); + -- CUDA nodes that belong to the package. + + CUDA_Init_Func : Entity_Id; + -- Entity of the cuda init func. + + Fat_Binary : Entity_Id; + -- Entity of the fat binary of N. Bound to said fat binary by a pragma. + + Fat_Binary_Handle : Entity_Id; + -- Entity of the result of passing the fat binary wrapper to. + -- CUDA.Register_Fat_Binary. + + Fat_Binary_Wrapper : Entity_Id; + -- Entity of a record that holds a bunch of magic numbers and a + -- reference to Fat_Binary. + + New_Stmt : Node_Id; + -- Node to store newly-created declarations + + -- Start of processing for Build_And_Insert_CUDA_Initialization + + begin + if CUDA_Node_List = No_Elist or not Debug_Flag_Underscore_C then + return; + end if; + + Fat_Binary := Make_Temporary (Loc, 'C'); + New_Stmt := Build_Fat_Binary_Declaration (Fat_Binary); + Append_To (Pack_Decls, New_Stmt); + Analyze (New_Stmt); + + New_Stmt := Build_Import_Pragma (Fat_Binary, N); + Append_To (Pack_Decls, New_Stmt); + Analyze (New_Stmt); + + Fat_Binary_Wrapper := Make_Temporary (Loc, 'C'); + New_Stmt := + Build_Fat_Binary_Wrapper_Declaration + (Wrapper_Id => Fat_Binary_Wrapper, + Bin_Id => Fat_Binary); + Append_To (Pack_Decls, New_Stmt); + Analyze (New_Stmt); + + Fat_Binary_Handle := Make_Temporary (Loc, 'C'); + New_Stmt := + Build_Fat_Binary_Handle_Declaration + (Fat_Binary_Handle, Fat_Binary_Wrapper); + Append_To (Pack_Decls, New_Stmt); + Analyze (New_Stmt); + + CUDA_Init_Func := Make_Temporary (Loc, 'C'); + New_Stmt := + Build_CUDA_Init_Proc + (Init_Id => CUDA_Init_Func, + CUDA_Kernels => CUDA_Node_List, + Handle_Id => Fat_Binary_Handle, + Pack_Decls => Pack_Decls); + Append_To (Pack_Decls, New_Stmt); + Analyze (New_Stmt); + + New_Stmt := + Make_Procedure_Call_Statement (Loc, + Name => New_Occurrence_Of (CUDA_Init_Func, Loc)); + Append_To (Pack_Decls, New_Stmt); + Analyze (New_Stmt); + end Build_And_Insert_CUDA_Initialization; + + -------------------- + -- Set_CUDA_Nodes -- + -------------------- + + procedure Set_CUDA_Kernels + (Pack_Id : Entity_Id; + Kernels : Elist_Id) + is + begin + pragma Assert (Get_CUDA_Kernels (Pack_Id) = No_Elist); + CUDA_Kernels_Table.Set (Pack_Id, Kernels); + end Set_CUDA_Kernels; + +end GNAT_CUDA; diff --git a/gcc/ada/gnat_cuda.ads b/gcc/ada/gnat_cuda.ads new file mode 100644 index 0000000..e27be34 --- /dev/null +++ b/gcc/ada/gnat_cuda.ads @@ -0,0 +1,107 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT COMPILER COMPONENTS -- +-- -- +-- C U D A -- +-- -- +-- S p e c -- +-- -- +-- Copyright (C) 2010-2020, Free Software Foundation, Inc. -- +-- -- +-- GNAT is free software; you can redistribute it and/or modify it under -- +-- terms of the GNU General Public License as published by the Free Soft- -- +-- ware Foundation; either version 3, or (at your option) any later ver- -- +-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- +-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- +-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- +-- for more details. You should have received a copy of the GNU General -- +-- Public License distributed with GNAT; see file COPYING3. If not, go to -- +-- http://www.gnu.org/licenses for a complete copy of the license. -- +-- -- +-- GNAT was originally developed by the GNAT team at New York University. -- +-- Extensive contributions were provided by Ada Core Technologies Inc. -- +-- -- +------------------------------------------------------------------------------ + +-- This package defines CUDA-specific datastructures and subprograms. +-- +-- Compiling for CUDA requires compiling for two targets. One is the CPU (more +-- frequently named "host"), the other is the GPU (the "device"). Compiling +-- for the host requires compiling the whole program. Compiling for the device +-- only requires compiling packages that contain CUDA kernels. +-- +-- When compiling for the device, GNAT-LLVM is used. It produces assembly +-- tailored to Nvidia's GPU (NVPTX). This NVPTX code is then assembled into +-- an object file by ptxas, an assembler provided by Nvidia. This object file +-- is then combined with its source code into a fat binary by a tool named +-- `fatbin`, also provided by Nvidia. The resulting fat binary is turned into +-- a regular object file by the host's linker and linked with the program that +-- executes on the host. +-- +-- A CUDA kernel is a procedure marked with the CUDA_Global pragma or aspect. +-- CUDA_Global does not have any effect when compiling for the device. When +-- compiling for the host, the frontend stores procedures marked with +-- CUDA_Global in a hash table the key of which is the Node_Id of the package +-- body that contains the CUDA_Global procedure. This is done in sem_prag.adb. +-- Once the declarations of a package body have been analyzed, variable, type +-- and procedure declarations necessary for the initialization of the CUDA +-- runtime are appended to the package that contains the CUDA_Global +-- procedure. +-- +-- These declarations are used to register the CUDA kernel with the CUDA +-- runtime when the program is launched. Registering a CUDA kernel with the +-- CUDA runtime requires multiple function calls: +-- - The first one registers the fat binary which corresponds to the package +-- with the CUDA runtime. +-- - Then, as many function calls as there are kernels in order to bind them +-- with the fat binary. +-- fat binary. +-- - The last call lets the CUDA runtime know that we are done initializing +-- CUDA. +-- Expansion of the CUDA_Global aspect is triggered in sem_ch7.adb, during +-- analysis of the package. All of this expansion is performed in the +-- Insert_CUDA_Initialization procedure defined in GNAT_CUDA. +-- +-- Once a CUDA package is initialized, its kernels are ready to be used. +-- Launching CUDA kernels is done by using the CUDA_Execute pragma. When +-- compiling for the host, the CUDA_Execute pragma is expanded into a declare +-- block which performs calls to the CUDA runtime functions. +-- - The first one pushes a "launch configuration" on the "configuration +-- stack" of the CUDA runtime. +-- - The second call pops this call configuration, making it effective. +-- - The third call actually launches the kernel. +-- Light validation of the CUDA_Execute pragma is performed in sem_prag.adb +-- and expansion is performed in exp_prag.adb. + +with Types; use Types; + +package GNAT_CUDA is + + procedure Add_CUDA_Kernel (Pack_Id : Entity_Id; Kernel : Entity_Id); + -- Add Kernel to the list of CUDA_Global nodes that belong to Pack_Id. + -- Kernel is a procedure entity marked with CUDA_Global, Pack_Id is the + -- entity of its parent package body. + + procedure Build_And_Insert_CUDA_Initialization (N : Node_Id); + -- Builds declarations necessary for CUDA initialization and inserts them + -- in N, the package body that contains CUDA_Global nodes. These + -- declarations are: + -- + -- * A symbol to hold the pointer to the CUDA fat binary + -- + -- * A type definition for a wrapper that contains the pointer to the + -- CUDA fat binary + -- + -- * An object of the aforementioned type to hold the aforementioned + -- pointer. + -- + -- * For each CUDA_Global procedure in the package, a declaration of a C + -- string containing the function's name. + -- + -- * A function that takes care of calling CUDA functions that register + -- CUDA_Global procedures with the runtime. + -- + -- * A boolean that holds the result of the call to the aforementioned + -- function. + +end GNAT_CUDA; diff --git a/gcc/ada/rtsfind.adb b/gcc/ada/rtsfind.adb index 83220ef..872ce01 100644 --- a/gcc/ada/rtsfind.adb +++ b/gcc/ada/rtsfind.adb @@ -589,7 +589,10 @@ package body Rtsfind is range CUDA_Driver_Types .. CUDA_Vector_Types; subtype Interfaces_Descendant is RTU_Id - range Interfaces_C .. Interfaces_Packed_Decimal; + range Interfaces_C .. Interfaces_C_Strings; + + subtype Interfaces_C_Descendant is Interfaces_Descendant + range Interfaces_C_Strings .. Interfaces_C_Strings; subtype System_Descendant is RTU_Id range System_Address_Image .. System_Tasking_Stages; @@ -674,6 +677,10 @@ package body Rtsfind is elsif U_Id in Interfaces_Descendant then Name_Buffer (11) := '.'; + if U_Id in Interfaces_C_Descendant then + Name_Buffer (13) := '.'; + end if; + elsif U_Id in System_Descendant then Name_Buffer (7) := '.'; diff --git a/gcc/ada/rtsfind.ads b/gcc/ada/rtsfind.ads index ed6b671..1c8a294 100644 --- a/gcc/ada/rtsfind.ads +++ b/gcc/ada/rtsfind.ads @@ -179,6 +179,10 @@ package Rtsfind is Interfaces_C, Interfaces_Packed_Decimal, + -- Children of Interfaces.C + + Interfaces_C_Strings, + -- Package System System, @@ -628,8 +632,12 @@ package Rtsfind is RE_Stream_T, -- CUDA.Driver_Types + RE_Fatbin_Wrapper, -- CUDA.Internal RE_Push_Call_Configuration, -- CUDA.Internal RE_Pop_Call_Configuration, -- CUDA.Internal + RE_Register_Fat_Binary, -- CUDA.Internal + RE_Register_Fat_Binary_End, -- CUDA.Internal + RE_Register_Function, -- CUDA.Internal RE_Launch_Kernel, -- CUDA.Runtime_Api @@ -647,6 +655,9 @@ package Rtsfind is RO_IC_Unsigned, -- Interfaces.C RO_IC_Unsigned_Long_Long, -- Interfaces.C + RE_Chars_Ptr, -- Interfaces.C.Strings + RE_New_Char_Array, -- Interfaces.C.Strings + RE_Address, -- System RE_Any_Priority, -- System RE_Bit_Order, -- System @@ -1927,8 +1938,12 @@ package Rtsfind is RE_Stream_T => CUDA_Driver_Types, + RE_Fatbin_Wrapper => CUDA_Internal, RE_Push_Call_Configuration => CUDA_Internal, RE_Pop_Call_Configuration => CUDA_Internal, + RE_Register_Fat_Binary => CUDA_Internal, + RE_Register_Fat_Binary_End => CUDA_Internal, + RE_Register_Function => CUDA_Internal, RE_Launch_Kernel => CUDA_Runtime_Api, @@ -1946,6 +1961,9 @@ package Rtsfind is RO_IC_Unsigned => Interfaces_C, RO_IC_Unsigned_Long_Long => Interfaces_C, + RE_Chars_Ptr => Interfaces_C_Strings, + RE_New_Char_Array => Interfaces_C_Strings, + RE_Address => System, RE_Any_Priority => System, RE_Bit_Order => System, diff --git a/gcc/ada/sem_ch7.adb b/gcc/ada/sem_ch7.adb index 04ff071..762f0c1 100644 --- a/gcc/ada/sem_ch7.adb +++ b/gcc/ada/sem_ch7.adb @@ -40,6 +40,7 @@ with Exp_Dist; use Exp_Dist; with Exp_Dbug; use Exp_Dbug; with Freeze; use Freeze; with Ghost; use Ghost; +with GNAT_CUDA; use GNAT_CUDA; with Lib; use Lib; with Lib.Xref; use Lib.Xref; with Namet; use Namet; @@ -999,6 +1000,13 @@ package body Sem_Ch7 is Analyze_List (Declarations (N)); end if; + -- If procedures marked with CUDA_Global have been defined within N, we + -- need to register them with the CUDA runtime at program startup. This + -- requires multiple declarations and function calls which need to be + -- appended to N's declarations. + + Build_And_Insert_CUDA_Initialization (N); + HSS := Handled_Statement_Sequence (N); if Present (HSS) then diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb index d9d957b..33a3f7a 100644 --- a/gcc/ada/sem_prag.adb +++ b/gcc/ada/sem_prag.adb @@ -44,6 +44,7 @@ with Exp_Util; use Exp_Util; with Expander; use Expander; with Freeze; use Freeze; with Ghost; use Ghost; +with GNAT_CUDA; use GNAT_CUDA; with Gnatvsn; use Gnatvsn; with Lib; use Lib; with Lib.Writ; use Lib.Writ; @@ -14892,6 +14893,7 @@ package body Sem_Prag is else Set_Is_CUDA_Kernel (Kernel_Proc); + Add_CUDA_Kernel (Pack_Id, Kernel_Proc); end if; end CUDA_Global; -- 2.7.4