From 31fef989934d38385567f9df8016e0d567a62bd0 Mon Sep 17 00:00:00 2001 From: Samuel Antao Date: Thu, 27 Oct 2016 17:39:44 +0000 Subject: [PATCH] [Driver][OpenMP] Add logic for offloading-specific argument translation. Summary: This patch includes support for argument translation that is specific of a given offloading kind. Additionally, it implements the translation for OpenMP device kinds in the gcc tool chain. With this patch, it is possible to compile a functional OpenMP application with offloading capabilities with no separate compilation. Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D21848 llvm-svn: 285320 --- clang/include/clang/Driver/Compilation.h | 35 ++++++++++++++++---- clang/include/clang/Driver/ToolChain.h | 9 +++-- clang/lib/Driver/Compilation.cpp | 9 ++--- clang/lib/Driver/Driver.cpp | 9 +++-- clang/lib/Driver/MSVCToolChain.cpp | 2 +- clang/lib/Driver/ToolChains.cpp | 56 +++++++++++++++++++++++++++++--- clang/lib/Driver/ToolChains.h | 19 ++++++----- clang/test/Driver/openmp-offload.c | 12 +++---- 8 files changed, 114 insertions(+), 37 deletions(-) diff --git a/clang/include/clang/Driver/Compilation.h b/clang/include/clang/Driver/Compilation.h index fbc4308..114e0b3 100644 --- a/clang/include/clang/Driver/Compilation.h +++ b/clang/include/clang/Driver/Compilation.h @@ -67,11 +67,27 @@ class Compilation { /// The root list of jobs. JobList Jobs; - /// Cache of translated arguments for a particular tool chain and bound - /// architecture. - llvm::DenseMap, - llvm::opt::DerivedArgList *> - TCArgs; + /// Cache of translated arguments for a particular tool chain, bound + /// architecture, and device offload kind. + struct TCArgsKey final { + const ToolChain *TC = nullptr; + StringRef BoundArch; + Action::OffloadKind DeviceOffloadKind = Action::OFK_None; + bool operator<(const TCArgsKey &K) const { + if (TC < K.TC) + return true; + else if (TC == K.TC && BoundArch < K.BoundArch) + return true; + else if (TC == K.TC && BoundArch == K.BoundArch && + DeviceOffloadKind < K.DeviceOffloadKind) + return true; + return false; + } + TCArgsKey(const ToolChain *TC, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) + : TC(TC), BoundArch(BoundArch), DeviceOffloadKind(DeviceOffloadKind) {} + }; + std::map TCArgs; /// Temporary files which should be removed on exit. llvm::opt::ArgStringList TempFiles; @@ -182,10 +198,15 @@ public: /// getArgsForToolChain - Return the derived argument list for the /// tool chain \p TC (or the default tool chain, if TC is not specified). + /// If a device offloading kind is specified, a translation specific for that + /// kind is performed, if any. /// /// \param BoundArch - The bound architecture name, or 0. - const llvm::opt::DerivedArgList &getArgsForToolChain(const ToolChain *TC, - StringRef BoundArch); + /// \param DeviceOffloadKind - The offload device kind that should be used in + /// the translation, if any. + const llvm::opt::DerivedArgList & + getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind); /// addTempFile - Add a file to remove on exit, and returns its /// argument. diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 073a0a2..0ce5915 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -190,12 +190,15 @@ public: /// TranslateArgs - Create a new derived argument list for any argument /// translations this ToolChain may wish to perform, or 0 if no tool chain - /// specific translations are needed. + /// specific translations are needed. If \p DeviceOffloadKind is specified + /// the translation specific for that offload kind is performed. /// /// \param BoundArch - The bound architecture name, or 0. + /// \param DeviceOffloadKind - The device offload kind used for the + /// translation. virtual llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch) const { + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const { return nullptr; } diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index 6bbaae6..5c13e59 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -50,14 +50,15 @@ Compilation::~Compilation() { } } -const DerivedArgList &Compilation::getArgsForToolChain(const ToolChain *TC, - StringRef BoundArch) { +const DerivedArgList & +Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) { if (!TC) TC = &DefaultToolChain; - DerivedArgList *&Entry = TCArgs[std::make_pair(TC, BoundArch)]; + DerivedArgList *&Entry = TCArgs[{TC, BoundArch, DeviceOffloadKind}]; if (!Entry) { - Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch); + Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind); if (!Entry) Entry = TranslatedArgs; } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 58c25cf..fe96052 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3011,7 +3011,8 @@ InputInfo Driver::BuildJobsForActionNoCache( // Set the effective triple of the toolchain for the duration of this job. llvm::Triple EffectiveTriple; const ToolChain &ToolTC = T->getToolChain(); - const ArgList &Args = C.getArgsForToolChain(TC, BoundArch); + const ArgList &Args = + C.getArgsForToolChain(TC, BoundArch, A->getOffloadingDeviceKind()); if (InputInfos.size() != 1) { EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args)); } else { @@ -3041,8 +3042,10 @@ InputInfo Driver::BuildJobsForActionNoCache( } llvm::errs() << "], output: " << Result.getAsString() << "\n"; } else { - T->ConstructJob(C, *JA, Result, InputInfos, - C.getArgsForToolChain(TC, BoundArch), LinkingOutput); + T->ConstructJob( + C, *JA, Result, InputInfos, + C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), + LinkingOutput); } return Result; } diff --git a/clang/lib/Driver/MSVCToolChain.cpp b/clang/lib/Driver/MSVCToolChain.cpp index 3a501b5..c6902fa 100644 --- a/clang/lib/Driver/MSVCToolChain.cpp +++ b/clang/lib/Driver/MSVCToolChain.cpp @@ -811,7 +811,7 @@ static void TranslateDArg(Arg *A, llvm::opt::DerivedArgList &DAL, llvm::opt::DerivedArgList * MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch) const { + StringRef BoundArch, Action::OffloadKind) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); diff --git a/clang/lib/Driver/ToolChains.cpp b/clang/lib/Driver/ToolChains.cpp index b8fbb30..3b33812 100644 --- a/clang/lib/Driver/ToolChains.cpp +++ b/clang/lib/Driver/ToolChains.cpp @@ -809,7 +809,8 @@ void DarwinClang::AddCCKextLibArgs(const ArgList &Args, } DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, - StringRef BoundArch) const { + StringRef BoundArch, + Action::OffloadKind) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); @@ -1038,10 +1039,12 @@ void MachO::AddLinkRuntimeLibArgs(const ArgList &Args, AddLinkRuntimeLib(Args, CmdArgs, CompilerRT, false, true); } -DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args, - StringRef BoundArch) const { +DerivedArgList * +Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const { // First get the generic Apple args, before moving onto Darwin-specific ones. - DerivedArgList *DAL = MachO::TranslateArgs(Args, BoundArch); + DerivedArgList *DAL = + MachO::TranslateArgs(Args, BoundArch, DeviceOffloadKind); const OptTable &Opts = getDriver().getOpts(); // If no architecture is bound, none of the translations here are relevant. @@ -2866,6 +2869,49 @@ bool Generic_GCC::addLibStdCXXIncludePaths( return true; } +llvm::opt::DerivedArgList * +Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef, + Action::OffloadKind DeviceOffloadKind) const { + + // If this tool chain is used for an OpenMP offloading device we have to make + // sure we always generate a shared library regardless of the commands the + // user passed to the host. This is required because the runtime library + // is required to load the device image dynamically at run time. + if (DeviceOffloadKind == Action::OFK_OpenMP) { + DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); + const OptTable &Opts = getDriver().getOpts(); + + // Request the shared library. Given that these options are decided + // implicitly, they do not refer to any base argument. + DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_shared)); + DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_fPIC)); + + // Filter all the arguments we don't care passing to the offloading + // toolchain as they can mess up with the creation of a shared library. + for (auto *A : Args) { + switch ((options::ID)A->getOption().getID()) { + default: + DAL->append(A); + break; + case options::OPT_shared: + case options::OPT_dynamic: + case options::OPT_static: + case options::OPT_fPIC: + case options::OPT_fno_PIC: + case options::OPT_fpic: + case options::OPT_fno_pic: + case options::OPT_fPIE: + case options::OPT_fno_PIE: + case options::OPT_fpie: + case options::OPT_fno_pie: + break; + } + } + return DAL; + } + return nullptr; +} + void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs, ArgStringList &CC1Args) const { const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion(); @@ -5032,7 +5078,7 @@ void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs, llvm::opt::DerivedArgList * CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch) const { + StringRef BoundArch, Action::OffloadKind) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); diff --git a/clang/lib/Driver/ToolChains.h b/clang/lib/Driver/ToolChains.h index c4c054d..73bfaed 100644 --- a/clang/lib/Driver/ToolChains.h +++ b/clang/lib/Driver/ToolChains.h @@ -222,6 +222,9 @@ public: bool isPIEDefault() const override; bool isPICDefaultForced() const override; bool IsIntegratedAssemblerDefault() const override; + llvm::opt::DerivedArgList * + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; protected: Tool *getTool(Action::ActionClass AC) const override; @@ -317,8 +320,8 @@ public: bool HasNativeLLVMSupport() const override; llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; bool IsBlocksDefault() const override { // Always allow blocks on Apple; users interested in versioning are @@ -522,8 +525,8 @@ public: bool isCrossCompiling() const override { return false; } llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; CXXStdlibType GetDefaultCXXStdlibType() const override; ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const override; @@ -855,8 +858,8 @@ public: const llvm::opt::ArgList &Args); llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; @@ -1061,8 +1064,8 @@ public: const llvm::opt::ArgList &Args); llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; bool IsIntegratedAssemblerDefault() const override; bool IsUnwindTablesDefault() const override; diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c index 1d40ec1..6ba844c 100644 --- a/clang/test/Driver/openmp-offload.c +++ b/clang/test/Driver/openmp-offload.c @@ -247,24 +247,24 @@ // // Compile for the powerpc device. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS: ld" {{.*}}"-o" "[[T1BIN]]" {{.*}}"[[T1OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1PP:.+\.i]]" "-x" "c" "[[INPUT]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "[[T1ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-o" "[[T1BIN]]" {{.*}}[[T1OBJ]] +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "[[T1BIN]]" {{.*}}[[T1OBJ]] // // Compile for the x86 device. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS: ld" {{.*}}"-o" "[[T2BIN]]" {{.*}}"[[T2OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2PP:.+\.i]]" "-x" "c" "[[INPUT]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2ASM:.+\.s]]" "-x" "ir" "[[T2BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "[[T2ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-o" "[[T2BIN]]" {{.*}}[[T2OBJ]] +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "[[T2BIN]]" {{.*}}[[T2OBJ]] // // Generate host object from the BC file and link using the linker script. -- 2.7.4