[Driver][OpenMP] Add specialized action builder for OpenMP offloading actions.
authorSamuel Antao <sfantao@us.ibm.com>
Thu, 27 Oct 2016 17:08:03 +0000 (17:08 +0000)
committerSamuel Antao <sfantao@us.ibm.com>
Thu, 27 Oct 2016 17:08:03 +0000 (17:08 +0000)
Summary:
This patch adds a new specialized action builder to create OpenMP offloading actions. The specialized builder is added to the action builder already containing the CUDA specialized builder.

OpenMP offloading dependences between host and device actions (expressed with OffloadActions) are different that what is used for CUDA:
 - Device compile action depends on the host compile action - the device frontend extracts the information about the declarations that have to be emitted by looking into the metadata produced by the host frontend.
 - The host link action depends on the device link actions - the device images are embedded in the host binary at link time.

Reviewers: echristo, tra, rsmith, jlebar, ABataev, hfinkel

Subscribers: mkuron, whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin

Differential Revision: https://reviews.llvm.org/D21845

llvm-svn: 285314

clang/lib/Driver/Driver.cpp
clang/test/Driver/openmp-offload.c

index bdf3e0e..514c000 100644 (file)
@@ -1544,8 +1544,9 @@ class OffloadingActionBuilder final {
     /// added to the provided host action \a HostAction. By default it is
     /// inactive.
     virtual ActionBuilderReturnCode
-    getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase,
-                      phases::ID FinalPhase, PhasesTy &Phases) {
+    getDeviceDependences(OffloadAction::DeviceDependences &DA,
+                         phases::ID CurPhase, phases::ID FinalPhase,
+                         PhasesTy &Phases) {
       return ABRT_Inactive;
     }
 
@@ -1603,8 +1604,9 @@ class OffloadingActionBuilder final {
         : DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {}
 
     ActionBuilderReturnCode
-    getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase,
-                      phases::ID FinalPhase, PhasesTy &Phases) override {
+    getDeviceDependences(OffloadAction::DeviceDependences &DA,
+                         phases::ID CurPhase, phases::ID FinalPhase,
+                         PhasesTy &Phases) override {
       if (!IsActive)
         return ABRT_Inactive;
 
@@ -1828,7 +1830,118 @@ class OffloadingActionBuilder final {
     }
   };
 
-  /// Add the implementation for other specialized builders here.
+  /// OpenMP action builder. The host bitcode is passed to the device frontend
+  /// and all the device linked images are passed to the host link phase.
+  class OpenMPActionBuilder final : public DeviceActionBuilder {
+    /// The OpenMP actions for the current input.
+    ActionList OpenMPDeviceActions;
+
+    /// The linker inputs obtained for each toolchain.
+    SmallVector<ActionList, 8> DeviceLinkerInputs;
+
+  public:
+    OpenMPActionBuilder(Compilation &C, DerivedArgList &Args,
+                        const Driver::InputList &Inputs)
+        : DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {}
+
+    ActionBuilderReturnCode
+    getDeviceDependences(OffloadAction::DeviceDependences &DA,
+                         phases::ID CurPhase, phases::ID FinalPhase,
+                         PhasesTy &Phases) override {
+
+      // We should always have an action for each input.
+      assert(OpenMPDeviceActions.size() == ToolChains.size() &&
+             "Number of OpenMP actions and toolchains do not match.");
+
+      // The host only depends on device action in the linking phase, when all
+      // the device images have to be embedded in the host image.
+      if (CurPhase == phases::Link) {
+        assert(ToolChains.size() == DeviceLinkerInputs.size() &&
+               "Toolchains and linker inputs sizes do not match.");
+        auto LI = DeviceLinkerInputs.begin();
+        for (auto *A : OpenMPDeviceActions) {
+          LI->push_back(A);
+          ++LI;
+        }
+
+        // We passed the device action as a host dependence, so we don't need to
+        // do anything else with them.
+        OpenMPDeviceActions.clear();
+        return ABRT_Success;
+      }
+
+      // By default, we produce an action for each device arch.
+      for (Action *&A : OpenMPDeviceActions)
+        A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
+
+      return ABRT_Success;
+    }
+
+    ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
+
+      // If this is an input action replicate it for each OpenMP toolchain.
+      if (auto *IA = dyn_cast<InputAction>(HostAction)) {
+        OpenMPDeviceActions.clear();
+        for (unsigned I = 0; I < ToolChains.size(); ++I)
+          OpenMPDeviceActions.push_back(
+              C.MakeAction<InputAction>(IA->getInputArg(), IA->getType()));
+        return ABRT_Success;
+      }
+
+      // When generating code for OpenMP we use the host compile phase result as
+      // a dependence to the device compile phase so that it can learn what
+      // declarations should be emitted. However, this is not the only use for
+      // the host action, so we prevent it from being collapsed.
+      if (isa<CompileJobAction>(HostAction)) {
+        HostAction->setCannotBeCollapsedWithNextDependentAction();
+        assert(ToolChains.size() == OpenMPDeviceActions.size() &&
+               "Toolchains and device action sizes do not match.");
+        OffloadAction::HostDependence HDep(
+            *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
+            /*BoundArch=*/nullptr, Action::OFK_OpenMP);
+        auto TC = ToolChains.begin();
+        for (Action *&A : OpenMPDeviceActions) {
+          assert(isa<CompileJobAction>(A));
+          OffloadAction::DeviceDependences DDep;
+          DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
+          A = C.MakeAction<OffloadAction>(HDep, DDep);
+          ++TC;
+        }
+      }
+      return ABRT_Success;
+    }
+
+    void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
+      assert(ToolChains.size() == DeviceLinkerInputs.size() &&
+             "Toolchains and linker inputs sizes do not match.");
+
+      // Append a new link action for each device.
+      auto TC = ToolChains.begin();
+      for (auto &LI : DeviceLinkerInputs) {
+        auto *DeviceLinkAction =
+            C.MakeAction<LinkJobAction>(LI, types::TY_Image);
+        DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
+               Action::OFK_OpenMP);
+        ++TC;
+      }
+    }
+
+    bool initialize() override {
+      // Get the OpenMP toolchains. If we don't get any, the action builder will
+      // know there is nothing to do related to OpenMP offloading.
+      auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
+      for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE;
+           ++TI)
+        ToolChains.push_back(TI->second);
+
+      DeviceLinkerInputs.resize(ToolChains.size());
+      return false;
+    }
+  };
+
+  ///
+  /// TODO: Add the implementation for other specialized builders here.
+  ///
 
   /// Specialized builders being used by this offloading action builder.
   SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders;
@@ -1844,6 +1957,9 @@ public:
     // Create a specialized builder for CUDA.
     SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));
 
+    // Create a specialized builder for OpenMP.
+    SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));
+
     //
     // TODO: Build other specialized builders here.
     //
@@ -1886,7 +2002,8 @@ public:
         continue;
       }
 
-      auto RetCode = SB->getDeviceDepences(DDeps, CurPhase, FinalPhase, Phases);
+      auto RetCode =
+          SB->getDeviceDependences(DDeps, CurPhase, FinalPhase, Phases);
 
       // If the builder explicitly says the host action should be ignored,
       // we need to increment the variable that tracks the builders that request
index 8faa18b..4781bdb 100644 (file)
@@ -2,6 +2,11 @@
 /// Perform several driver tests for OpenMP offloading
 ///
 
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: powerpc-registered-target
+// REQUIRES: nvptx-registered-target
+
 /// ###########################################################################
 
 /// Check whether an invalid OpenMP target is specified:
 // RUN:   %clang -### -ccc-print-phases -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu  %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-DUPLICATES %s
 // CHK-DUPLICATES: warning: The OpenMP offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified - will be ignored.
+
+/// ###########################################################################
+
+/// Check the phases graph when using a single target, different from the host.
+/// We should have an offload action joining the host compile and device
+/// preprocessor and another one joining the device linking outputs to the host
+/// action.
+// RUN:   %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-PHASES %s
+// CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c, (host-openmp)
+// CHK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp)
+// CHK-PHASES: 2: compiler, {1}, ir, (host-openmp)
+// CHK-PHASES: 3: backend, {2}, assembler, (host-openmp)
+// CHK-PHASES: 4: assembler, {3}, object, (host-openmp)
+// CHK-PHASES: 5: linker, {4}, image, (host-openmp)
+// CHK-PHASES: 6: input, "[[INPUT]]", c, (device-openmp)
+// CHK-PHASES: 7: preprocessor, {6}, cpp-output, (device-openmp)
+// CHK-PHASES: 8: compiler, {7}, ir, (device-openmp)
+// CHK-PHASES: 9: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir
+// CHK-PHASES: 10: backend, {9}, assembler, (device-openmp)
+// CHK-PHASES: 11: assembler, {10}, object, (device-openmp)
+// CHK-PHASES: 12: linker, {11}, image, (device-openmp)
+// CHK-PHASES: 13: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {5}, "device-openmp (x86_64-pc-linux-gnu)" {12}, image
+
+/// ###########################################################################
+
+/// Check the phases when using multiple targets. Here we also add a library to
+/// make sure it is treated as input by the device.
+// RUN:   %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-PHASES-LIB %s
+// CHK-PHASES-LIB: 0: input, "somelib", object, (host-openmp)
+// CHK-PHASES-LIB: 1: input, "[[INPUT:.+\.c]]", c, (host-openmp)
+// CHK-PHASES-LIB: 2: preprocessor, {1}, cpp-output, (host-openmp)
+// CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp)
+// CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp)
+// CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp)
+// CHK-PHASES-LIB: 6: linker, {0, 5}, image, (host-openmp)
+// CHK-PHASES-LIB: 7: input, "somelib", object, (device-openmp)
+// CHK-PHASES-LIB: 8: input, "[[INPUT]]", c, (device-openmp)
+// CHK-PHASES-LIB: 9: preprocessor, {8}, cpp-output, (device-openmp)
+// CHK-PHASES-LIB: 10: compiler, {9}, ir, (device-openmp)
+// CHK-PHASES-LIB: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {10}, ir
+// CHK-PHASES-LIB: 12: backend, {11}, assembler, (device-openmp)
+// CHK-PHASES-LIB: 13: assembler, {12}, object, (device-openmp)
+// CHK-PHASES-LIB: 14: linker, {7, 13}, image, (device-openmp)
+// CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp)
+// CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp)
+// CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp)
+// CHK-PHASES-LIB: 18: compiler, {17}, ir, (device-openmp)
+// CHK-PHASES-LIB: 19: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {18}, ir
+// CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp)
+// CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp)
+// CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp)
+// CHK-PHASES-LIB: 23: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {6}, "device-openmp (x86_64-pc-linux-gnu)" {14}, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image
+
+
+/// ###########################################################################
+
+/// Check the phases when using multiple targets and multiple source files
+// RUN:   echo " " > %t.c
+// RUN:   %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %t.c 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-PHASES-FILES %s
+// CHK-PHASES-FILES: 0: input, "somelib", object, (host-openmp)
+// CHK-PHASES-FILES: 1: input, "[[INPUT1:.+\.c]]", c, (host-openmp)
+// CHK-PHASES-FILES: 2: preprocessor, {1}, cpp-output, (host-openmp)
+// CHK-PHASES-FILES: 3: compiler, {2}, ir, (host-openmp)
+// CHK-PHASES-FILES: 4: backend, {3}, assembler, (host-openmp)
+// CHK-PHASES-FILES: 5: assembler, {4}, object, (host-openmp)
+// CHK-PHASES-FILES: 6: input, "[[INPUT2:.+\.c]]", c, (host-openmp)
+// CHK-PHASES-FILES: 7: preprocessor, {6}, cpp-output, (host-openmp)
+// CHK-PHASES-FILES: 8: compiler, {7}, ir, (host-openmp)
+// CHK-PHASES-FILES: 9: backend, {8}, assembler, (host-openmp)
+// CHK-PHASES-FILES: 10: assembler, {9}, object, (host-openmp)
+// CHK-PHASES-FILES: 11: linker, {0, 5, 10}, image, (host-openmp)
+// CHK-PHASES-FILES: 12: input, "somelib", object, (device-openmp)
+// CHK-PHASES-FILES: 13: input, "[[INPUT1]]", c, (device-openmp)
+// CHK-PHASES-FILES: 14: preprocessor, {13}, cpp-output, (device-openmp)
+// CHK-PHASES-FILES: 15: compiler, {14}, ir, (device-openmp)
+// CHK-PHASES-FILES: 16: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {15}, ir
+// CHK-PHASES-FILES: 17: backend, {16}, assembler, (device-openmp)
+// CHK-PHASES-FILES: 18: assembler, {17}, object, (device-openmp)
+// CHK-PHASES-FILES: 19: input, "[[INPUT2]]", c, (device-openmp)
+// CHK-PHASES-FILES: 20: preprocessor, {19}, cpp-output, (device-openmp)
+// CHK-PHASES-FILES: 21: compiler, {20}, ir, (device-openmp)
+// CHK-PHASES-FILES: 22: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {21}, ir
+// CHK-PHASES-FILES: 23: backend, {22}, assembler, (device-openmp)
+// CHK-PHASES-FILES: 24: assembler, {23}, object, (device-openmp)
+// CHK-PHASES-FILES: 25: linker, {12, 18, 24}, image, (device-openmp)
+// CHK-PHASES-FILES: 26: input, "somelib", object, (device-openmp)
+// CHK-PHASES-FILES: 27: input, "[[INPUT1]]", c, (device-openmp)
+// CHK-PHASES-FILES: 28: preprocessor, {27}, cpp-output, (device-openmp)
+// CHK-PHASES-FILES: 29: compiler, {28}, ir, (device-openmp)
+// CHK-PHASES-FILES: 30: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {29}, ir
+// CHK-PHASES-FILES: 31: backend, {30}, assembler, (device-openmp)
+// CHK-PHASES-FILES: 32: assembler, {31}, object, (device-openmp)
+// CHK-PHASES-FILES: 33: input, "[[INPUT2]]", c, (device-openmp)
+// CHK-PHASES-FILES: 34: preprocessor, {33}, cpp-output, (device-openmp)
+// CHK-PHASES-FILES: 35: compiler, {34}, ir, (device-openmp)
+// CHK-PHASES-FILES: 36: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (powerpc64-ibm-linux-gnu)" {35}, ir
+// CHK-PHASES-FILES: 37: backend, {36}, assembler, (device-openmp)
+// CHK-PHASES-FILES: 38: assembler, {37}, object, (device-openmp)
+// CHK-PHASES-FILES: 39: linker, {26, 32, 38}, image, (device-openmp)
+// CHK-PHASES-FILES: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {11}, "device-openmp (x86_64-pc-linux-gnu)" {25}, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image
+
+/// ###########################################################################
+
+/// Check the phases graph when using a single GPU target, and check the OpenMP
+/// and CUDA phases are articulated correctly.
+// RUN:   %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -x cuda %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-PHASES-WITH-CUDA %s
+// CHK-PHASES-WITH-CUDA: 0: input, "[[INPUT:.+\.c]]", cuda, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 2: compiler, {1}, ir, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_20)
+// CHK-PHASES-WITH-CUDA: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20)
+// CHK-PHASES-WITH-CUDA: 5: compiler, {4}, ir, (device-cuda, sm_20)
+// CHK-PHASES-WITH-CUDA: 6: backend, {5}, assembler, (device-cuda, sm_20)
+// CHK-PHASES-WITH-CUDA: 7: assembler, {6}, object, (device-cuda, sm_20)
+// CHK-PHASES-WITH-CUDA: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {7}, object
+// CHK-PHASES-WITH-CUDA: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {6}, assembler
+// CHK-PHASES-WITH-CUDA: 10: linker, {8, 9}, cuda-fatbin, (device-cuda)
+// CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir
+// CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 13: assembler, {12}, object, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 14: linker, {13}, image, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 15: input, "[[INPUT]]", cuda, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 16: preprocessor, {15}, cuda-cpp-output, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 17: compiler, {16}, ir, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 18: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {17}, ir
+// CHK-PHASES-WITH-CUDA: 19: backend, {18}, assembler, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image