From bcc77b6249a6f193f7322dbb3fdf77482d67bbee Mon Sep 17 00:00:00 2001 From: Jason Henline Date: Wed, 24 Aug 2016 21:31:53 +0000 Subject: [PATCH] [StreamExecutor] Rename Executor to Device Summary: This more clearly describes what the class is. Reviewers: jlebar Subscribers: jprice, parallel_libs-commits Differential Revision: https://reviews.llvm.org/D23851 llvm-svn: 279669 --- .../streamexecutor/{Executor.h => Device.h} | 46 +- .../streamexecutor/include/streamexecutor/Kernel.h | 22 +- .../include/streamexecutor/PlatformInterfaces.h | 15 +- .../streamexecutor/include/streamexecutor/Stream.h | 41 +- parallel-libs/streamexecutor/lib/CMakeLists.txt | 2 +- .../lib/{Executor.cpp => Device.cpp} | 16 +- parallel-libs/streamexecutor/lib/Kernel.cpp | 12 +- .../streamexecutor/lib/PlatformInterfaces.cpp | 2 +- parallel-libs/streamexecutor/lib/Stream.cpp | 3 +- .../streamexecutor/lib/unittests/CMakeLists.txt | 8 +- .../streamexecutor/lib/unittests/DeviceTest.cpp | 476 ++++++++++++++++++++ .../streamexecutor/lib/unittests/ExecutorTest.cpp | 478 --------------------- .../streamexecutor/lib/unittests/KernelTest.cpp | 20 +- .../streamexecutor/lib/unittests/StreamTest.cpp | 14 +- 14 files changed, 575 insertions(+), 580 deletions(-) rename parallel-libs/streamexecutor/include/streamexecutor/{Executor.h => Device.h} (89%) rename parallel-libs/streamexecutor/lib/{Executor.cpp => Device.cpp} (67%) create mode 100644 parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp delete mode 100644 parallel-libs/streamexecutor/lib/unittests/ExecutorTest.cpp diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Executor.h b/parallel-libs/streamexecutor/include/streamexecutor/Device.h similarity index 89% rename from parallel-libs/streamexecutor/include/streamexecutor/Executor.h rename to parallel-libs/streamexecutor/include/streamexecutor/Device.h index 6b0bc18..34bba80 100644 --- a/parallel-libs/streamexecutor/include/streamexecutor/Executor.h +++ b/parallel-libs/streamexecutor/include/streamexecutor/Device.h @@ -1,4 +1,4 @@ -//===-- Executor.h - The Executor class -------------------------*- C++ -*-===// +//===-- Device.h - The Device class -----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,12 +8,12 @@ //===----------------------------------------------------------------------===// /// /// \file -/// The Executor class which represents a single device of a specific platform. +/// The Device class which represents a single device of a specific platform. /// //===----------------------------------------------------------------------===// -#ifndef STREAMEXECUTOR_EXECUTOR_H -#define STREAMEXECUTOR_EXECUTOR_H +#ifndef STREAMEXECUTOR_DEVICE_H +#define STREAMEXECUTOR_DEVICE_H #include "streamexecutor/KernelSpec.h" #include "streamexecutor/PlatformInterfaces.h" @@ -24,10 +24,10 @@ namespace streamexecutor { class KernelInterface; class Stream; -class Executor { +class Device { public: - explicit Executor(PlatformExecutor *PExecutor); - virtual ~Executor(); + explicit Device(PlatformDevice *PDevice); + virtual ~Device(); /// Gets the kernel implementation for the underlying platform. virtual Expected> @@ -42,7 +42,7 @@ public: template Expected> allocateDeviceMemory(size_t ElementCount) { Expected MaybeBase = - PExecutor->allocateDeviceMemory(ElementCount * sizeof(T)); + PDevice->allocateDeviceMemory(ElementCount * sizeof(T)); if (!MaybeBase) return MaybeBase.takeError(); return GlobalDeviceMemory(*MaybeBase); @@ -50,7 +50,7 @@ public: /// Frees memory previously allocated with allocateDeviceMemory. template Error freeDeviceMemory(GlobalDeviceMemory Memory) { - return PExecutor->freeDeviceMemory(Memory); + return PDevice->freeDeviceMemory(Memory); } /// Allocates an array of ElementCount entries of type T in host memory. @@ -59,7 +59,7 @@ public: /// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D. template Expected allocateHostMemory(size_t ElementCount) { Expected MaybeMemory = - PExecutor->allocateHostMemory(ElementCount * sizeof(T)); + PDevice->allocateHostMemory(ElementCount * sizeof(T)); if (!MaybeMemory) return MaybeMemory.takeError(); return static_cast(*MaybeMemory); @@ -67,7 +67,7 @@ public: /// Frees memory previously allocated with allocateHostMemory. template Error freeHostMemory(T *Memory) { - return PExecutor->freeHostMemory(Memory); + return PDevice->freeHostMemory(Memory); } /// Registers a previously allocated host array of type T for asynchronous @@ -77,15 +77,15 @@ public: /// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D. template Error registerHostMemory(T *Memory, size_t ElementCount) { - return PExecutor->registerHostMemory(Memory, ElementCount * sizeof(T)); + return PDevice->registerHostMemory(Memory, ElementCount * sizeof(T)); } /// Unregisters host memory previously registered by registerHostMemory. template Error unregisterHostMemory(T *Memory) { - return PExecutor->unregisterHostMemory(Memory); + return PDevice->unregisterHostMemory(Memory); } - /// \anchor ExecutorHostSyncCopyGroup + /// \anchor DeviceHostSyncCopyGroup /// \name Host-synchronous device memory copying functions /// /// These methods block the calling host thread while copying data to or from @@ -125,9 +125,9 @@ public: return make_error( "copying too many elements, " + llvm::Twine(ElementCount) + ", to a host array of element count " + llvm::Twine(Dst.size())); - return PExecutor->synchronousCopyD2H( - Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.data(), 0, - ElementCount * sizeof(T)); + return PDevice->synchronousCopyD2H(Src.getBaseMemory(), + Src.getElementOffset() * sizeof(T), + Dst.data(), 0, ElementCount * sizeof(T)); } template @@ -179,9 +179,9 @@ public: llvm::Twine(ElementCount) + ", to a device array of element count " + llvm::Twine(Dst.getElementCount())); - return PExecutor->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(), - Dst.getElementOffset() * sizeof(T), - ElementCount * sizeof(T)); + return PDevice->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(), + Dst.getElementOffset() * sizeof(T), + ElementCount * sizeof(T)); } template @@ -234,7 +234,7 @@ public: llvm::Twine(ElementCount) + ", to a device array of element count " + llvm::Twine(Dst.getElementCount())); - return PExecutor->synchronousCopyD2D( + return PDevice->synchronousCopyD2D( Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)); @@ -292,9 +292,9 @@ public: ///@} End host-synchronous device memory copying functions private: - PlatformExecutor *PExecutor; + PlatformDevice *PDevice; }; } // namespace streamexecutor -#endif // STREAMEXECUTOR_EXECUTOR_H +#endif // STREAMEXECUTOR_DEVICE_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h b/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h index b817162..4a2eeb4 100644 --- a/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h +++ b/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h @@ -54,13 +54,13 @@ /// function as follows: /// \code /// namespace ccn = compiler_cuda_namespace; -/// // Assumes Executor is a pointer to the StreamExecutor on which to -/// // launch the kernel. +/// // Assumes Device is a pointer to the Device on which to launch the +/// // kernel. /// // /// // See KernelSpec.h for details on how the compiler can create a /// // MultiKernelLoaderSpec instance like SaxpyKernelLoaderSpec below. /// Expected MaybeKernel = -/// ccn::SaxpyKernel::create(Executor, ccn::SaxpyKernelLoaderSpec); +/// ccn::SaxpyKernel::create(Device, ccn::SaxpyKernelLoaderSpec); /// if (!MaybeKernel) { /* Handle error */ } /// ccn::SaxpyKernel SaxpyKernel = *MaybeKernel; /// Launch(SaxpyKernel, A, X, Y); @@ -84,7 +84,7 @@ namespace streamexecutor { -class Executor; +class Device; class KernelInterface; /// The base class for device kernel functions. @@ -100,13 +100,13 @@ public: KernelBase &operator=(KernelBase &&) = default; ~KernelBase(); - /// Creates a kernel object from an Executor and a MultiKernelLoaderSpec. + /// Creates a kernel object from a Device and a MultiKernelLoaderSpec. /// - /// The Executor knows which platform it belongs to and the + /// The Device knows which platform it belongs to and the /// MultiKernelLoaderSpec knows how to find the kernel code for different /// platforms, so the combined information is enough to get the kernel code /// for the appropriate platform. - static Expected create(Executor *ParentExecutor, + static Expected create(Device *Dev, const MultiKernelLoaderSpec &Spec); const std::string &getName() const { return Name; } @@ -116,11 +116,11 @@ public: KernelInterface *getImplementation() { return Implementation.get(); } private: - KernelBase(Executor *ParentExecutor, const std::string &Name, + KernelBase(Device *Dev, const std::string &Name, const std::string &DemangledName, std::unique_ptr Implementation); - Executor *ParentExecutor; + Device *TheDevice; std::string Name; std::string DemangledName; std::unique_ptr Implementation; @@ -136,9 +136,9 @@ public: TypedKernel &operator=(TypedKernel &&) = default; /// Parameters here have the same meaning as in KernelBase::create. - static Expected create(Executor *ParentExecutor, + static Expected create(Device *Dev, const MultiKernelLoaderSpec &Spec) { - auto MaybeBase = KernelBase::create(ParentExecutor, Spec); + auto MaybeBase = KernelBase::create(Dev, Spec); if (!MaybeBase) { return MaybeBase.takeError(); } diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h b/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h index 2c8fce3..b7737e8 100644 --- a/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h +++ b/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h @@ -31,7 +31,7 @@ namespace streamexecutor { -class PlatformExecutor; +class PlatformDevice; /// Methods supported by device kernel function objects on all platforms. class KernelInterface { @@ -41,15 +41,14 @@ class KernelInterface { /// Platform-specific stream handle. class PlatformStreamHandle { public: - explicit PlatformStreamHandle(PlatformExecutor *PExecutor) - : PExecutor(PExecutor) {} + explicit PlatformStreamHandle(PlatformDevice *PDevice) : PDevice(PDevice) {} virtual ~PlatformStreamHandle(); - PlatformExecutor *getExecutor() { return PExecutor; } + PlatformDevice *getDevice() { return PDevice; } private: - PlatformExecutor *PExecutor; + PlatformDevice *PDevice; }; /// Raw executor methods that must be implemented by each platform. @@ -57,11 +56,11 @@ private: /// This class defines the platform interface that supports executing work on a /// device. /// -/// The public Executor and Stream classes have the type-safe versions of the +/// The public Device and Stream classes have the type-safe versions of the /// functions in this interface. -class PlatformExecutor { +class PlatformDevice { public: - virtual ~PlatformExecutor(); + virtual ~PlatformDevice(); virtual std::string getName() const = 0; diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Stream.h b/parallel-libs/streamexecutor/include/streamexecutor/Stream.h index 3293053..0e6e898 100644 --- a/parallel-libs/streamexecutor/include/streamexecutor/Stream.h +++ b/parallel-libs/streamexecutor/include/streamexecutor/Stream.h @@ -12,19 +12,18 @@ /// A Stream instance represents a queue of sequential, host-asynchronous work /// to be performed on a device. /// -/// To enqueue work on a device, first create a Executor instance for a -/// given device and then use that Executor to create a Stream instance. -/// The Stream instance will perform its work on the device managed by the -/// Executor that created it. +/// To enqueue work on a device, first create a Device instance then use that +/// Device to create a Stream instance. The Stream instance will perform its +/// work on the device managed by the Device object that created it. /// /// The various "then" methods of the Stream object, such as thenCopyH2D and /// thenLaunch, may be used to enqueue work on the Stream, and the /// blockHostUntilDone() method may be used to block the host code until the /// Stream has completed all its work. /// -/// Multiple Stream instances can be created for the same Executor. This -/// allows several independent streams of computation to be performed -/// simultaneously on a single device. +/// Multiple Stream instances can be created for the same Device. This allows +/// several independent streams of computation to be performed simultaneously on +/// a single device. /// //===----------------------------------------------------------------------===// @@ -94,8 +93,8 @@ public: const ParameterTs &... Arguments) { auto ArgumentArray = make_kernel_argument_pack(Arguments...); - setError(PExecutor->launch(ThePlatformStream.get(), BlockSize, GridSize, - Kernel, ArgumentArray)); + setError(PDevice->launch(ThePlatformStream.get(), BlockSize, GridSize, + Kernel, ArgumentArray)); return *this; } @@ -105,13 +104,13 @@ public: /// return without waiting for the operation to complete. /// /// Any host memory used as a source or destination for one of these - /// operations must be allocated with Executor::allocateHostMemory or - /// registered with Executor::registerHostMemory. Otherwise, the enqueuing - /// operation may block until the copy operation is fully complete. + /// operations must be allocated with Device::allocateHostMemory or registered + /// with Device::registerHostMemory. Otherwise, the enqueuing operation may + /// block until the copy operation is fully complete. /// /// The arguments and bounds checking for these methods match the API of the - /// \ref ExecutorHostSyncCopyGroup - /// "host-synchronous device memory copying functions" of Executor. + /// \ref DeviceHostSyncCopyGroup + /// "host-synchronous device memory copying functions" of Device. ///@{ template @@ -125,9 +124,9 @@ public: setError("copying too many elements, " + llvm::Twine(ElementCount) + ", to a host array of element count " + llvm::Twine(Dst.size())); else - setError(PExecutor->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(), - Src.getElementOffset() * sizeof(T), - Dst.data(), 0, ElementCount * sizeof(T))); + setError(PDevice->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(), + Src.getElementOffset() * sizeof(T), Dst.data(), + 0, ElementCount * sizeof(T))); return *this; } @@ -182,7 +181,7 @@ public: ", to a device array of element count " + llvm::Twine(Dst.getElementCount())); else - setError(PExecutor->copyH2D( + setError(PDevice->copyH2D( ThePlatformStream.get(), Src.data(), 0, Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T))); return *this; @@ -238,7 +237,7 @@ public: ", to a device array of element count " + llvm::Twine(Dst.getElementCount())); else - setError(PExecutor->copyD2D( + setError(PDevice->copyD2D( ThePlatformStream.get(), Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T))); @@ -322,8 +321,8 @@ private: ErrorMessage = Message.str(); } - /// The PlatformExecutor that supports the operations of this stream. - PlatformExecutor *PExecutor; + /// The PlatformDevice that supports the operations of this stream. + PlatformDevice *PDevice; /// The platform-specific stream handle for this instance. std::unique_ptr ThePlatformStream; diff --git a/parallel-libs/streamexecutor/lib/CMakeLists.txt b/parallel-libs/streamexecutor/lib/CMakeLists.txt index 7f5cb20..cf7baf9 100644 --- a/parallel-libs/streamexecutor/lib/CMakeLists.txt +++ b/parallel-libs/streamexecutor/lib/CMakeLists.txt @@ -6,7 +6,7 @@ add_library( add_library( streamexecutor $ - Executor.cpp + Device.cpp Kernel.cpp KernelSpec.cpp PackedKernelArgumentArray.cpp diff --git a/parallel-libs/streamexecutor/lib/Executor.cpp b/parallel-libs/streamexecutor/lib/Device.cpp similarity index 67% rename from parallel-libs/streamexecutor/lib/Executor.cpp rename to parallel-libs/streamexecutor/lib/Device.cpp index f103a76..4a5ec11 100644 --- a/parallel-libs/streamexecutor/lib/Executor.cpp +++ b/parallel-libs/streamexecutor/lib/Device.cpp @@ -1,4 +1,4 @@ -//===-- Executor.cpp - Executor implementation ----------------------------===// +//===-- Device.cpp - Device implementation --------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,11 +8,11 @@ //===----------------------------------------------------------------------===// /// /// \file -/// Implementation of Executor class internals. +/// Implementation of Device class internals. /// //===----------------------------------------------------------------------===// -#include "streamexecutor/Executor.h" +#include "streamexecutor/Device.h" #include @@ -23,17 +23,17 @@ namespace streamexecutor { -Executor::Executor(PlatformExecutor *PExecutor) : PExecutor(PExecutor) {} +Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {} -Executor::~Executor() = default; +Device::~Device() = default; -Expected> Executor::createStream() { +Expected> Device::createStream() { Expected> MaybePlatformStream = - PExecutor->createStream(); + PDevice->createStream(); if (!MaybePlatformStream) { return MaybePlatformStream.takeError(); } - assert((*MaybePlatformStream)->getExecutor() == PExecutor && + assert((*MaybePlatformStream)->getDevice() == PDevice && "an executor created a stream with a different stored executor"); return llvm::make_unique(std::move(*MaybePlatformStream)); } diff --git a/parallel-libs/streamexecutor/lib/Kernel.cpp b/parallel-libs/streamexecutor/lib/Kernel.cpp index 9e99e91..fa09920 100644 --- a/parallel-libs/streamexecutor/lib/Kernel.cpp +++ b/parallel-libs/streamexecutor/lib/Kernel.cpp @@ -13,31 +13,31 @@ //===----------------------------------------------------------------------===// #include "streamexecutor/Kernel.h" -#include "streamexecutor/Executor.h" +#include "streamexecutor/Device.h" #include "streamexecutor/PlatformInterfaces.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" namespace streamexecutor { -KernelBase::KernelBase(Executor *ParentExecutor, const std::string &Name, +KernelBase::KernelBase(Device *Dev, const std::string &Name, const std::string &DemangledName, std::unique_ptr Implementation) - : ParentExecutor(ParentExecutor), Name(Name), DemangledName(DemangledName), + : TheDevice(Dev), Name(Name), DemangledName(DemangledName), Implementation(std::move(Implementation)) {} KernelBase::~KernelBase() = default; -Expected KernelBase::create(Executor *ParentExecutor, +Expected KernelBase::create(Device *Dev, const MultiKernelLoaderSpec &Spec) { - auto MaybeImplementation = ParentExecutor->getKernelImplementation(Spec); + auto MaybeImplementation = Dev->getKernelImplementation(Spec); if (!MaybeImplementation) { return MaybeImplementation.takeError(); } std::string Name = Spec.getKernelName(); std::string DemangledName = llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr); - KernelBase Instance(ParentExecutor, Name, DemangledName, + KernelBase Instance(Dev, Name, DemangledName, std::move(*MaybeImplementation)); return std::move(Instance); } diff --git a/parallel-libs/streamexecutor/lib/PlatformInterfaces.cpp b/parallel-libs/streamexecutor/lib/PlatformInterfaces.cpp index e0ae644..770cd170 100644 --- a/parallel-libs/streamexecutor/lib/PlatformInterfaces.cpp +++ b/parallel-libs/streamexecutor/lib/PlatformInterfaces.cpp @@ -18,6 +18,6 @@ namespace streamexecutor { PlatformStreamHandle::~PlatformStreamHandle() = default; -PlatformExecutor::~PlatformExecutor() = default; +PlatformDevice::~PlatformDevice() = default; } // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/Stream.cpp b/parallel-libs/streamexecutor/lib/Stream.cpp index 40f52f9..20a817c 100644 --- a/parallel-libs/streamexecutor/lib/Stream.cpp +++ b/parallel-libs/streamexecutor/lib/Stream.cpp @@ -17,8 +17,7 @@ namespace streamexecutor { Stream::Stream(std::unique_ptr PStream) - : PExecutor(PStream->getExecutor()), ThePlatformStream(std::move(PStream)) { -} + : PDevice(PStream->getDevice()), ThePlatformStream(std::move(PStream)) {} Stream::~Stream() = default; diff --git a/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt b/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt index 244312f..3b414e3 100644 --- a/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt +++ b/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt @@ -1,12 +1,12 @@ add_executable( - executor_test - ExecutorTest.cpp) + device_test + DeviceTest.cpp) target_link_libraries( - executor_test + device_test streamexecutor ${GTEST_BOTH_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) -add_test(ExecutorTest executor_test) +add_test(DeviceTest device_test) add_executable( kernel_test diff --git a/parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp b/parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp new file mode 100644 index 0000000..cb34b8b --- /dev/null +++ b/parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp @@ -0,0 +1,476 @@ +//===-- DeviceTest.cpp - Tests for Device ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the unit tests for Device code. +/// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "streamexecutor/Device.h" +#include "streamexecutor/PlatformInterfaces.h" + +#include "gtest/gtest.h" + +namespace { + +namespace se = ::streamexecutor; + +class MockPlatformDevice : public se::PlatformDevice { +public: + ~MockPlatformDevice() override {} + + std::string getName() const override { return "MockPlatformDevice"; } + + se::Expected> + createStream() override { + return se::make_error("not implemented"); + } + + se::Expected + allocateDeviceMemory(size_t ByteCount) override { + return se::GlobalDeviceMemoryBase(std::malloc(ByteCount)); + } + + se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override { + std::free(const_cast(Memory.getHandle())); + return se::Error::success(); + } + + se::Expected allocateHostMemory(size_t ByteCount) override { + return std::malloc(ByteCount); + } + + se::Error freeHostMemory(void *Memory) override { + std::free(Memory); + return se::Error::success(); + } + + se::Error registerHostMemory(void *, size_t) override { + return se::Error::success(); + } + + se::Error unregisterHostMemory(void *) override { + return se::Error::success(); + } + + se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc, + size_t SrcByteOffset, void *HostDst, + size_t DstByteOffset, + size_t ByteCount) override { + std::memcpy(static_cast(HostDst) + DstByteOffset, + static_cast(DeviceSrc.getHandle()) + + SrcByteOffset, + ByteCount); + return se::Error::success(); + } + + se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset, + se::GlobalDeviceMemoryBase DeviceDst, + size_t DstByteOffset, + size_t ByteCount) override { + std::memcpy(static_cast(const_cast(DeviceDst.getHandle())) + + DstByteOffset, + static_cast(HostSrc) + SrcByteOffset, ByteCount); + return se::Error::success(); + } + + se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst, + size_t DstByteOffset, + const se::GlobalDeviceMemoryBase &DeviceSrc, + size_t SrcByteOffset, + size_t ByteCount) override { + std::memcpy(static_cast(const_cast(DeviceDst.getHandle())) + + DstByteOffset, + static_cast(DeviceSrc.getHandle()) + + SrcByteOffset, + ByteCount); + return se::Error::success(); + } +}; + +/// Test fixture to hold objects used by tests. +class DeviceTest : public ::testing::Test { +public: + DeviceTest() + : HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9}, + HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23}, + DeviceA5(se::GlobalDeviceMemory::makeFromElementCount(HostA5, 5)), + DeviceB5(se::GlobalDeviceMemory::makeFromElementCount(HostB5, 5)), + DeviceA7(se::GlobalDeviceMemory::makeFromElementCount(HostA7, 7)), + DeviceB7(se::GlobalDeviceMemory::makeFromElementCount(HostB7, 7)), + Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35}, + Device(&PDevice) {} + + // Device memory is backed by host arrays. + int HostA5[5]; + int HostB5[5]; + int HostA7[7]; + int HostB7[7]; + se::GlobalDeviceMemory DeviceA5; + se::GlobalDeviceMemory DeviceB5; + se::GlobalDeviceMemory DeviceA7; + se::GlobalDeviceMemory DeviceB7; + + // Host memory to be used as actual host memory. + int Host5[5]; + int Host7[7]; + + MockPlatformDevice PDevice; + se::Device Device; +}; + +#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast(E)) +#define EXPECT_ERROR(E) \ + do { \ + se::Error E__ = E; \ + EXPECT_TRUE(static_cast(E__)); \ + consumeError(std::move(E__)); \ + } while (false) + +using llvm::ArrayRef; +using llvm::MutableArrayRef; + +TEST_F(DeviceTest, AllocateAndFreeDeviceMemory) { + se::Expected> MaybeMemory = + Device.allocateDeviceMemory(10); + EXPECT_TRUE(static_cast(MaybeMemory)); + EXPECT_NO_ERROR(Device.freeDeviceMemory(*MaybeMemory)); +} + +TEST_F(DeviceTest, AllocateAndFreeHostMemory) { + se::Expected MaybeMemory = Device.allocateHostMemory(10); + EXPECT_TRUE(static_cast(MaybeMemory)); + EXPECT_NO_ERROR(Device.freeHostMemory(*MaybeMemory)); +} + +TEST_F(DeviceTest, RegisterAndUnregisterHostMemory) { + std::vector Data(10); + EXPECT_NO_ERROR(Device.registerHostMemory(Data.data(), 10)); + EXPECT_NO_ERROR(Device.unregisterHostMemory(Data.data())); +} + +// D2H tests + +TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRefByCount) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5), 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_NO_ERROR( + Device.synchronousCopyD2H(DeviceB5, MutableArrayRef(Host5), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostB5[I], Host5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA7, MutableArrayRef(Host5), 7)); + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host7), 7)); + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5), 7)); +} + +TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRef) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5))); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA7, MutableArrayRef(Host5))); + + EXPECT_ERROR( + Device.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host7))); +} + +TEST_F(DeviceTest, SyncCopyD2HToPointer) { + EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5, Host5, 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5, Host7, 7)); +} + +TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRefByCount) { + EXPECT_NO_ERROR(Device.synchronousCopyD2H( + DeviceA5.asSlice().drop_front(1), MutableArrayRef(Host5 + 1, 4), 4)); + for (int I = 1; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1), + MutableArrayRef(Host5), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostB5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice(), + MutableArrayRef(Host5), 7)); + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), + MutableArrayRef(Host7), 7)); + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), + MutableArrayRef(Host5), 7)); +} + +TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRef) { + EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5), + MutableArrayRef(Host5))); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA7[I + 1], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1), + MutableArrayRef(Host5))); + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), + MutableArrayRef(Host7))); +} + +TEST_F(DeviceTest, SyncCopyD2HSliceToPointer) { + EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1), + Host5 + 1, 4)); + for (int I = 1; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7)); +} + +// H2D tests + +TEST_F(DeviceTest, SyncCopyH2DToArrayRefByCount) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5, 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceB5, 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostB5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host7), DeviceA5, 7)); + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA7, 7)); + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5, 7)); +} + +TEST_F(DeviceTest, SyncCopyH2DToArrayRef) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA7)); + + EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef(Host7), DeviceA5)); +} + +TEST_F(DeviceTest, SyncCopyH2DToPointer) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5, 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5, 7)); +} + +TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRefByCount) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D( + ArrayRef(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4)); + for (int I = 1; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_NO_ERROR(Device.synchronousCopyH2D( + ArrayRef(Host5), DeviceB5.asSlice().drop_back(1), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostB5[I], Host5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host7), DeviceA5.asSlice(), 7)); + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA7.asSlice(), 7)); + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5.asSlice(), 7)); +} + +TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRef) { + EXPECT_NO_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA5.asSlice())); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host5), DeviceA7.asSlice())); + + EXPECT_ERROR( + Device.synchronousCopyH2D(ArrayRef(Host7), DeviceA5.asSlice())); +} + +TEST_F(DeviceTest, SyncCopyH2DSliceToPointer) { + EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], Host5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7)); +} + +// D2D tests + +TEST_F(DeviceTest, SyncCopyD2DByCount) { + EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB5[I]); + } + + EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB7, 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostA7[I], HostB7[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5, 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7, 7)); +} + +TEST_F(DeviceTest, SyncCopyD2D) { + EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB5[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7)); +} + +TEST_F(DeviceTest, SyncCopySliceD2DByCount) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1), DeviceB5, 4)); + for (int I = 0; I < 4; ++I) { + EXPECT_EQ(HostA5[I + 1], HostB5[I]); + } + + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostA7[I], HostB7[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7)); +} + +TEST_F(DeviceTest, SyncCopySliceD2D) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA7[I], HostB5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5)); + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7)); +} + +TEST_F(DeviceTest, SyncCopyD2DSliceByCount) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_front(2), 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB7[I + 2]); + } + + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostA7[I], HostB7[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7)); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7)); +} + +TEST_F(DeviceTest, SyncCopyD2DSlice) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2))); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB7[I]); + } + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice())); + + EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice())); +} + +TEST_F(DeviceTest, SyncCopySliceD2DSliceByCount) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5)); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB5[I]); + } + + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2)); + for (int I = 0; I < 2; ++I) { + EXPECT_EQ(HostA7[I], HostB7[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7)); + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7)); + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7)); +} + +TEST_F(DeviceTest, SyncCopySliceD2DSlice) { + EXPECT_NO_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice())); + for (int I = 0; I < 5; ++I) { + EXPECT_EQ(HostA5[I], HostB5[I]); + } + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice())); + + EXPECT_ERROR( + Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice())); +} + +} // namespace diff --git a/parallel-libs/streamexecutor/lib/unittests/ExecutorTest.cpp b/parallel-libs/streamexecutor/lib/unittests/ExecutorTest.cpp deleted file mode 100644 index b6719d3..0000000 --- a/parallel-libs/streamexecutor/lib/unittests/ExecutorTest.cpp +++ /dev/null @@ -1,478 +0,0 @@ -//===-- ExecutorTest.cpp - Tests for Executor -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the unit tests for Executor code. -/// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "streamexecutor/Executor.h" -#include "streamexecutor/PlatformInterfaces.h" - -#include "gtest/gtest.h" - -namespace { - -namespace se = ::streamexecutor; - -class MockPlatformExecutor : public se::PlatformExecutor { -public: - ~MockPlatformExecutor() override {} - - std::string getName() const override { return "MockPlatformExecutor"; } - - se::Expected> - createStream() override { - return se::make_error("not implemented"); - } - - se::Expected - allocateDeviceMemory(size_t ByteCount) override { - return se::GlobalDeviceMemoryBase(std::malloc(ByteCount)); - } - - se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override { - std::free(const_cast(Memory.getHandle())); - return se::Error::success(); - } - - se::Expected allocateHostMemory(size_t ByteCount) override { - return std::malloc(ByteCount); - } - - se::Error freeHostMemory(void *Memory) override { - std::free(Memory); - return se::Error::success(); - } - - se::Error registerHostMemory(void *, size_t) override { - return se::Error::success(); - } - - se::Error unregisterHostMemory(void *) override { - return se::Error::success(); - } - - se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc, - size_t SrcByteOffset, void *HostDst, - size_t DstByteOffset, - size_t ByteCount) override { - std::memcpy(static_cast(HostDst) + DstByteOffset, - static_cast(DeviceSrc.getHandle()) + - SrcByteOffset, - ByteCount); - return se::Error::success(); - } - - se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset, - se::GlobalDeviceMemoryBase DeviceDst, - size_t DstByteOffset, - size_t ByteCount) override { - std::memcpy(static_cast(const_cast(DeviceDst.getHandle())) + - DstByteOffset, - static_cast(HostSrc) + SrcByteOffset, ByteCount); - return se::Error::success(); - } - - se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst, - size_t DstByteOffset, - const se::GlobalDeviceMemoryBase &DeviceSrc, - size_t SrcByteOffset, - size_t ByteCount) override { - std::memcpy(static_cast(const_cast(DeviceDst.getHandle())) + - DstByteOffset, - static_cast(DeviceSrc.getHandle()) + - SrcByteOffset, - ByteCount); - return se::Error::success(); - } -}; - -/// Test fixture to hold objects used by tests. -class ExecutorTest : public ::testing::Test { -public: - ExecutorTest() - : HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9}, - HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23}, - DeviceA5(se::GlobalDeviceMemory::makeFromElementCount(HostA5, 5)), - DeviceB5(se::GlobalDeviceMemory::makeFromElementCount(HostB5, 5)), - DeviceA7(se::GlobalDeviceMemory::makeFromElementCount(HostA7, 7)), - DeviceB7(se::GlobalDeviceMemory::makeFromElementCount(HostB7, 7)), - Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35}, - Executor(&PExecutor) {} - - // Device memory is backed by host arrays. - int HostA5[5]; - int HostB5[5]; - int HostA7[7]; - int HostB7[7]; - se::GlobalDeviceMemory DeviceA5; - se::GlobalDeviceMemory DeviceB5; - se::GlobalDeviceMemory DeviceA7; - se::GlobalDeviceMemory DeviceB7; - - // Host memory to be used as actual host memory. - int Host5[5]; - int Host7[7]; - - MockPlatformExecutor PExecutor; - se::Executor Executor; -}; - -#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast(E)) -#define EXPECT_ERROR(E) \ - do { \ - se::Error E__ = E; \ - EXPECT_TRUE(static_cast(E__)); \ - consumeError(std::move(E__)); \ - } while (false) - -using llvm::ArrayRef; -using llvm::MutableArrayRef; - -TEST_F(ExecutorTest, AllocateAndFreeDeviceMemory) { - se::Expected> MaybeMemory = - Executor.allocateDeviceMemory(10); - EXPECT_TRUE(static_cast(MaybeMemory)); - EXPECT_NO_ERROR(Executor.freeDeviceMemory(*MaybeMemory)); -} - -TEST_F(ExecutorTest, AllocateAndFreeHostMemory) { - se::Expected MaybeMemory = Executor.allocateHostMemory(10); - EXPECT_TRUE(static_cast(MaybeMemory)); - EXPECT_NO_ERROR(Executor.freeHostMemory(*MaybeMemory)); -} - -TEST_F(ExecutorTest, RegisterAndUnregisterHostMemory) { - std::vector Data(10); - EXPECT_NO_ERROR(Executor.registerHostMemory(Data.data(), 10)); - EXPECT_NO_ERROR(Executor.unregisterHostMemory(Data.data())); -} - -// D2H tests - -TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRefByCount) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5), 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_NO_ERROR( - Executor.synchronousCopyD2H(DeviceB5, MutableArrayRef(Host5), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostB5[I], Host5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef(Host5), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host7), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5), 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRef) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host5))); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef(Host5))); - - EXPECT_ERROR( - Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef(Host7))); -} - -TEST_F(ExecutorTest, SyncCopyD2HToPointer) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host5, 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host7, 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRefByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2H( - DeviceA5.asSlice().drop_front(1), MutableArrayRef(Host5 + 1, 4), 4)); - for (int I = 1; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1), - MutableArrayRef(Host5), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostB5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice(), - MutableArrayRef(Host5), 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), - MutableArrayRef(Host7), 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), - MutableArrayRef(Host5), 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRef) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5), - MutableArrayRef(Host5))); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA7[I + 1], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1), - MutableArrayRef(Host5))); - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), - MutableArrayRef(Host7))); -} - -TEST_F(ExecutorTest, SyncCopyD2HSliceToPointer) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1), - Host5 + 1, 4)); - for (int I = 1; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7)); -} - -// H2D tests - -TEST_F(ExecutorTest, SyncCopyH2DToArrayRefByCount) { - EXPECT_NO_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5, 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_NO_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceB5, 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostB5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host7), DeviceA5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA7, 7)); - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5, 7)); -} - -TEST_F(ExecutorTest, SyncCopyH2DToArrayRef) { - EXPECT_NO_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA7)); - - EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef(Host7), DeviceA5)); -} - -TEST_F(ExecutorTest, SyncCopyH2DToPointer) { - EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5, 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5, 7)); -} - -TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRefByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyH2D( - ArrayRef(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4)); - for (int I = 1; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyH2D( - ArrayRef(Host5), DeviceB5.asSlice().drop_back(1), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostB5[I], Host5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host7), DeviceA5.asSlice(), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA7.asSlice(), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5.asSlice(), 7)); -} - -TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRef) { - EXPECT_NO_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA5.asSlice())); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host5), DeviceA7.asSlice())); - - EXPECT_ERROR( - Executor.synchronousCopyH2D(ArrayRef(Host7), DeviceA5.asSlice())); -} - -TEST_F(ExecutorTest, SyncCopyH2DSliceToPointer) { - EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], Host5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7)); -} - -// D2D tests - -TEST_F(ExecutorTest, SyncCopyD2DByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB5[I]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB7, 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostA7[I], HostB7[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7, 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2D) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB5[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7)); -} - -TEST_F(ExecutorTest, SyncCopySliceD2DByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1), - DeviceB5, 4)); - for (int I = 0; I < 4; ++I) { - EXPECT_EQ(HostA5[I + 1], HostB5[I]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), - DeviceB7, 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostA7[I], HostB7[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7)); -} - -TEST_F(ExecutorTest, SyncCopySliceD2D) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA7[I], HostB5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5)); - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7)); -} - -TEST_F(ExecutorTest, SyncCopyD2DSliceByCount) { - EXPECT_NO_ERROR(Executor.synchronousCopyD2D( - DeviceA5, DeviceB7.asSlice().drop_front(2), 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB7[I + 2]); - } - - EXPECT_NO_ERROR(Executor.synchronousCopyD2D( - DeviceA7, DeviceB7.asSlice().drop_back(3), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostA7[I], HostB7[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7)); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7)); -} - -TEST_F(ExecutorTest, SyncCopyD2DSlice) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2))); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB7[I]); - } - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice())); - - EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice())); -} - -TEST_F(ExecutorTest, SyncCopySliceD2DSliceByCount) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5)); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB5[I]); - } - - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2)); - for (int I = 0; I < 2; ++I) { - EXPECT_EQ(HostA7[I], HostB7[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7)); - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7)); -} - -TEST_F(ExecutorTest, SyncCopySliceD2DSlice) { - EXPECT_NO_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice())); - for (int I = 0; I < 5; ++I) { - EXPECT_EQ(HostA5[I], HostB5[I]); - } - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice())); - - EXPECT_ERROR( - Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice())); -} - -} // namespace diff --git a/parallel-libs/streamexecutor/lib/unittests/KernelTest.cpp b/parallel-libs/streamexecutor/lib/unittests/KernelTest.cpp index b5ee8a0..a19ebfb9 100644 --- a/parallel-libs/streamexecutor/lib/unittests/KernelTest.cpp +++ b/parallel-libs/streamexecutor/lib/unittests/KernelTest.cpp @@ -14,7 +14,7 @@ #include -#include "streamexecutor/Executor.h" +#include "streamexecutor/Device.h" #include "streamexecutor/Kernel.h" #include "streamexecutor/KernelSpec.h" #include "streamexecutor/PlatformInterfaces.h" @@ -27,7 +27,7 @@ namespace { namespace se = ::streamexecutor; -// An Executor that returns a dummy KernelInterface. +// A Device that returns a dummy KernelInterface. // // During construction it creates a unique_ptr to a dummy KernelInterface and it // also stores a separate copy of the raw pointer that is stored by that @@ -39,10 +39,10 @@ namespace se = ::streamexecutor; // object. The raw pointer copy can then be used to identify the unique_ptr in // its new location (by comparing the raw pointer with unique_ptr::get), to // verify that the unique_ptr ended up where it was supposed to be. -class MockExecutor : public se::Executor { +class MockDevice : public se::Device { public: - MockExecutor() - : se::Executor(nullptr), Unique(llvm::make_unique()), + MockDevice() + : se::Device(nullptr), Unique(llvm::make_unique()), Raw(Unique.get()) {} // Moves the unique pointer into the returned se::Expected instance. @@ -51,7 +51,7 @@ public: // out. se::Expected> getKernelImplementation(const se::MultiKernelLoaderSpec &) override { - assert(Unique && "MockExecutor getKernelImplementation should not be " + assert(Unique && "MockDevice getKernelImplementation should not be " "called more than once"); return std::move(Unique); } @@ -79,15 +79,15 @@ TYPED_TEST_CASE(GetImplementationTest, GetImplementationTypes); // Tests that the kernel create functions properly fetch the implementation // pointers for the kernel objects they construct from the passed-in -// Executor objects. +// Device objects. TYPED_TEST(GetImplementationTest, SetImplementationDuringCreate) { se::MultiKernelLoaderSpec Spec; - MockExecutor MockExecutor; + MockDevice Dev; - auto MaybeKernel = TypeParam::create(&MockExecutor, Spec); + auto MaybeKernel = TypeParam::create(&Dev, Spec); EXPECT_TRUE(static_cast(MaybeKernel)); se::KernelInterface *Implementation = MaybeKernel->getImplementation(); - EXPECT_EQ(MockExecutor.getRaw(), Implementation); + EXPECT_EQ(Dev.getRaw(), Implementation); } } // namespace diff --git a/parallel-libs/streamexecutor/lib/unittests/StreamTest.cpp b/parallel-libs/streamexecutor/lib/unittests/StreamTest.cpp index 7564670..d05c928 100644 --- a/parallel-libs/streamexecutor/lib/unittests/StreamTest.cpp +++ b/parallel-libs/streamexecutor/lib/unittests/StreamTest.cpp @@ -14,7 +14,7 @@ #include -#include "streamexecutor/Executor.h" +#include "streamexecutor/Device.h" #include "streamexecutor/Kernel.h" #include "streamexecutor/KernelSpec.h" #include "streamexecutor/PlatformInterfaces.h" @@ -26,14 +26,14 @@ namespace { namespace se = ::streamexecutor; -/// Mock PlatformExecutor that performs asynchronous memcpy operations by +/// Mock PlatformDevice that performs asynchronous memcpy operations by /// ignoring the stream argument and calling std::memcpy on device memory /// handles. -class MockPlatformExecutor : public se::PlatformExecutor { +class MockPlatformDevice : public se::PlatformDevice { public: - ~MockPlatformExecutor() override {} + ~MockPlatformDevice() override {} - std::string getName() const override { return "MockPlatformExecutor"; } + std::string getName() const override { return "MockPlatformDevice"; } se::Expected> createStream() override { @@ -83,7 +83,7 @@ public: DeviceA7(se::GlobalDeviceMemory::makeFromElementCount(HostA7, 7)), DeviceB7(se::GlobalDeviceMemory::makeFromElementCount(HostB7, 7)), Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35}, - Stream(llvm::make_unique(&PExecutor)) {} + Stream(llvm::make_unique(&PDevice)) {} protected: // Device memory is backed by host arrays. @@ -100,7 +100,7 @@ protected: int Host5[5]; int Host7[7]; - MockPlatformExecutor PExecutor; + MockPlatformDevice PDevice; se::Stream Stream; }; -- 2.7.4