--- /dev/null
+BasedOnStyle: LLVM
--- /dev/null
+Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,readability-identifier-naming'
+CheckOptions:
+ - key: readability-identifier-naming.ClassCase
+ value: CamelCase
+ - key: readability-identifier-naming.EnumCase
+ value: CamelCase
+ - key: readability-identifier-naming.FunctionCase
+ value: camelBack
+ - key: readability-identifier-naming.MemberCase
+ value: CamelCase
+ - key: readability-identifier-naming.ParameterCase
+ value: CamelCase
+ - key: readability-identifier-naming.UnionCase
+ value: CamelCase
+ - key: readability-identifier-naming.VariableCase
+ value: CamelCase
+
--- /dev/null
+cmake_minimum_required(VERSION 3.1)
+
+option(ACXXEL_ENABLE_UNIT_TESTS "enable acxxel unit tests" ON)
+option(ACXXEL_ENABLE_EXAMPLES "enable acxxel examples" OFF)
+option(ACXXEL_ENABLE_DOXYGEN "enable Doxygen for acxxel" OFF)
+option(ACXXEL_ENABLE_CUDA "enable CUDA for acxxel" ON)
+option(ACXXEL_ENABLE_OPENCL "enable OpenCL for acxxel" ON)
+
+project(acxxel)
+
+if(ACXXEL_ENABLE_CUDA)
+find_package(CUDA REQUIRED)
+include_directories(${CUDA_INCLUDE_DIRS})
+find_library(CUDA_DRIVER_LIBRARY cuda)
+if(NOT CUDA_DRIVER_LIBRARY)
+ message(FATAL_ERROR "could not find libcuda, is the CUDA driver installed on your system?")
+endif(NOT CUDA_DRIVER_LIBRARY)
+set(ACXXEL_CUDA_SOURCES cuda_acxxel.cpp)
+set(ACXXEL_CUDA_LIBRARIES ${CUDA_DRIVER_LIBRARY} ${CUDA_LIBRARIES})
+endif(ACXXEL_ENABLE_CUDA)
+
+if(ACXXEL_ENABLE_OPENCL)
+find_package(OpenCL REQUIRED)
+include_directories(${OpenCL_INCLUDE_DIRS})
+set(ACXXEL_OPENCL_SOURCES opencl_acxxel.cpp)
+set(ACXXEL_OPENCL_LIBRARIES ${OpenCL_LIBRARIES})
+endif()
+
+configure_file(config.h.in config.h)
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+# Insist on C++ 11 features.
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Add warning flags.
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-missing-braces")
+
+add_library(
+ acxxel
+ acxxel.cpp
+ ${ACXXEL_CUDA_SOURCES}
+ ${ACXXEL_OPENCL_SOURCES})
+target_link_libraries(
+ acxxel
+ ${ACXXEL_CUDA_LIBRARIES}
+ ${ACXXEL_OPENCL_LIBRARIES})
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+if(ACXXEL_ENABLE_EXAMPLES)
+ add_subdirectory(examples)
+endif()
+
+if(ACXXEL_ENABLE_UNIT_TESTS)
+ enable_testing()
+ find_package(GTest REQUIRED)
+ include_directories(${GTEST_INCLUDE_DIRS})
+ find_package(Threads)
+ add_subdirectory(tests)
+endif()
+
+if(ACXXEL_ENABLE_DOXYGEN)
+ find_package(Doxygen REQUIRED)
+ configure_file(Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
+ add_custom_target(
+ acxxel-doc
+ ${DOXYGEN_EXECUTABLE}
+ ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
+ WORKING_DIRECTORY
+ ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT
+ "Generating acxxel API documentation with Doxygen"
+ VERBATIM)
+endif()
--- /dev/null
+# Doxyfile 1.8.6
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all text
+# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
+# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
+# for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME = "Acxxel"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF =
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is included in
+# the documentation. The maximum height of the logo should not exceed 55 pixels
+# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo
+# to the output directory.
+
+PROJECT_LOGO =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY =
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF = Yes
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a
+# new page for each member. If set to NO, the documentation of a member will be
+# part of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines.
+
+ALIASES =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
+# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C.
+#
+# Note For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT = YES
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by by putting a % sign in front of the word
+# or globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO these classes will be included in the various overviews. This option has
+# no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING = NO
+
+# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the
+# todo list. This list is created by putting \todo commands in the
+# documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the
+# test list. This list is created by putting \test commands in the
+# documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES the list
+# will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. Do not use file names with spaces, bibtex cannot handle them. See
+# also \cite for info how to create references.
+
+CITE_BIB_FILES =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET = YES
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO doxygen will only warn about wrong or incomplete parameter
+# documentation, but not about the absence of documentation.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces.
+# Note: If this tag is empty the current directory is searched.
+
+INPUT = @CMAKE_CURRENT_SOURCE_DIR@
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: http://www.gnu.org/software/libiconv) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank the
+# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii,
+# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp,
+# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown,
+# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
+# *.qsf, *.as and *.js.
+
+FILE_PATTERNS =
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS = */examples/* */tools/* */unittests/* */.*
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH = @CMAKE_CURRENT_SOURCE_DIR@/examples
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE = YES
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER ) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# function all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES, then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see http://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX = 1
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user-
+# defined cascading style sheet that is included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefor more robust against future updates.
+# Doxygen will copy the style sheet file to the output directory. For an example
+# see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET = @CMAKE_CURRENT_SOURCE_DIR@/customdoxygen.css
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the stylesheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: http://developer.apple.com/tools/xcode/), introduced with
+# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler ( hhc.exe). If non-empty
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated (
+# YES) or that it should be included in the master .chm file ( NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated (
+# YES) or a normal table of contents ( NO) in the .chm file.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS =
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW = YES
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# http://www.mathjax.org) which uses client side Javascript for the rendering
+# instead of using prerendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from http://www.mathjax.org before deployment.
+# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript. There
+# are two flavours of web server based searching depending on the
+# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
+# searching and an index file used by the script. When EXTERNAL_SEARCH is
+# enabled the indexing and searching needs to be provided by external tools. See
+# the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when enabling USE_PDFLATEX this option is only used for generating
+# bitmaps for formulas in the HTML output, but not in the Makefile that is
+# written to the output directory.
+# The default file is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. To get the times font for
+# instance you can specify
+# EXTRA_PACKAGES=times
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber. Doxygen will
+# replace them by respectively the title of the page, the current date and time,
+# only the current date, the version number of doxygen, the project name (see
+# PROJECT_NAME), or the project number (see PROJECT_NUMBER).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS = YES
+
+# If the LATEX_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE = plain
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's config
+# file, i.e. a series of assignments. You only have to provide replacements,
+# missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's config file. A template extensions file can be generated
+# using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify a XML schema, which can be used by a
+# validating XML parser to check the syntax of the XML files.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify a XML DTD, which can be used by a
+# validating XML parser to check the syntax of the XML files.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT = docbook
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen
+# Definitions (see http://autogen.sf.net) file that captures the structure of
+# the code including all documentation. Note that this feature is still
+# experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names
+# in the source code. If set to NO only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES the includes files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all refrences to function-like macros that are alone on a line, have an
+# all uppercase name, and do not end with a semicolon. Such function macros are
+# typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have an unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external class will be listed in the
+# class index. If set to NO only the inherited external classes will be listed.
+# The default value is: NO.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in
+# the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS = YES
+
+# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of 'which perl').
+# The default file (with absolute path) is: /usr/bin/perl.
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see:
+# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH =
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH =
+
+# If set to YES, the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS = 0
+
+# When you want a differently looking font n the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot.
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif and svg.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS = YES
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP = YES
--- /dev/null
+==============================================================================
+LLVM Release License
+==============================================================================
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2007-2016 University of Illinois at Urbana-Champaign.
+All rights reserved.
+
+Developed by:
+
+ LLVM Team
+
+ University of Illinois at Urbana-Champaign
+
+ http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimers.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimers in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the names of the LLVM Team, University of Illinois at
+ Urbana-Champaign, nor the names of its contributors may be used to
+ endorse or promote products derived from this Software without specific
+ prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+The LLVM software contains code written by third parties. Such software will
+have its own individual LICENSE.TXT file in the directory in which it appears.
+This file will describe the copyrights, license, and restrictions which apply
+to that code.
+
+The disclaimer of warranty in the University of Illinois Open Source License
+applies to all code in the LLVM Distribution, and nothing in any of the
+other licenses gives permission to use the names of the LLVM Team or the
+University of Illinois to endorse or promote products derived from this
+Software.
+
+The following pieces of software have additional or alternate copyrights,
+licenses, and/or restrictions:
+
+Program Directory
+------- ---------
+<none yet>
+
--- /dev/null
+//===--- acxxel.cpp - Implementation details for the Acxxel API -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "acxxel.h"
+#include "config.h"
+
+#include <algorithm>
+#include <iostream>
+#include <string>
+
+namespace acxxel {
+
+namespace cuda {
+Expected<Platform *> getPlatform();
+} // namespace cuda
+
+namespace opencl {
+Expected<Platform *> getPlatform();
+} // namespace opencl
+
+void logWarning(const std::string &Message) {
+ std::cerr << "WARNING: " << Message << "\n";
+}
+
+Expected<Platform *> getCUDAPlatform() {
+#ifdef ACXXEL_ENABLE_CUDA
+ return cuda::getPlatform();
+#else
+ return Status("library was build without CUDA support");
+#endif
+}
+
+Expected<Platform *> getOpenCLPlatform() {
+#ifdef ACXXEL_ENABLE_OPENCL
+ return opencl::getPlatform();
+#else
+ return Status("library was build without OpenCL support");
+#endif
+}
+
+Stream::Stream(Stream &&) noexcept = default;
+Stream &Stream::operator=(Stream &&) noexcept = default;
+
+Status Stream::sync() {
+ return takeStatusOr(ThePlatform->streamSync(TheHandle.get()));
+}
+
+Status Stream::waitOnEvent(Event &Event) {
+ return takeStatusOr(ThePlatform->streamWaitOnEvent(
+ TheHandle.get(), ThePlatform->getEventHandle(Event)));
+}
+
+Stream &
+Stream::addCallback(std::function<void(Stream &, const Status &)> Callback) {
+ setStatus(ThePlatform->addStreamCallback(*this, std::move(Callback)));
+ return *this;
+}
+
+Stream &Stream::asyncKernelLaunch(const Kernel &TheKernel,
+ KernelLaunchDimensions LaunchDimensions,
+ Span<void *> Arguments,
+ Span<size_t> ArgumentSizes,
+ size_t SharedMemoryBytes) {
+ setStatus(ThePlatform->rawEnqueueKernelLaunch(
+ TheHandle.get(), TheKernel.TheHandle.get(), LaunchDimensions, Arguments,
+ ArgumentSizes, SharedMemoryBytes));
+ return *this;
+}
+
+Stream &Stream::enqueueEvent(Event &E) {
+ setStatus(ThePlatform->enqueueEvent(ThePlatform->getEventHandle(E),
+ TheHandle.get()));
+ return *this;
+}
+
+Event::Event(Event &&) noexcept = default;
+Event &Event::operator=(Event &&) noexcept = default;
+
+bool Event::isDone() { return ThePlatform->eventIsDone(TheHandle.get()); }
+
+Status Event::sync() { return ThePlatform->eventSync(TheHandle.get()); }
+
+Expected<float> Event::getSecondsSince(const Event &Previous) {
+ Expected<float> MaybeSeconds = ThePlatform->getSecondsBetweenEvents(
+ Previous.TheHandle.get(), TheHandle.get());
+ if (MaybeSeconds.isError())
+ MaybeSeconds.getError();
+ return MaybeSeconds;
+}
+
+Expected<Kernel> Program::createKernel(const std::string &Name) {
+ Expected<void *> MaybeKernelHandle =
+ ThePlatform->rawCreateKernel(TheHandle.get(), Name);
+ if (MaybeKernelHandle.isError())
+ return MaybeKernelHandle.getError();
+ return Kernel(ThePlatform, MaybeKernelHandle.getValue(),
+ ThePlatform->getKernelHandleDestructor());
+}
+
+Program::Program(Program &&) noexcept = default;
+Program &Program::operator=(Program &&That) noexcept = default;
+
+Kernel::Kernel(Kernel &&) noexcept = default;
+Kernel &Kernel::operator=(Kernel &&That) noexcept = default;
+
+} // namespace acxxel
--- /dev/null
+//===--- acxxel.h - The Acxxel API ------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/// \mainpage Welcome to Acxxel
+///
+/// \section Introduction
+///
+/// \b Acxxel is a library providing a modern C++ interface for managing
+/// accelerator devices such as GPUs. Acxxel handles operations such as
+/// allocating device memory, copying data to and from device memory, creating
+/// and managing device events, and creating and managing device streams.
+///
+/// \subsection ExampleUsage Example Usage
+///
+/// Below is some example code to show you the basics of Acxxel.
+///
+/// \snippet examples/simple_example.cu Example simple saxpy
+///
+/// The above code could be compiled with either `clang` or `nvcc`. Compare this
+/// with the standard CUDA runtime library code to perform these same
+/// operations:
+///
+/// \snippet examples/simple_example.cu Example CUDA simple saxpy
+///
+/// Notice that the CUDA runtime calls are not type safe. For example, if you
+/// change the type of the inputs from `float` to `double`, you have to remember
+/// to change the size calculation. If you forget, you will get garbage output
+/// data. In the Acxxel example, you would instead get a helpful compile-time
+/// error that wouldn't let you forget to change the types inside the function.
+///
+/// The Acxxel example also automatically uses the right sizes for memory
+/// copies, so you don't have to worry about computing the sizes yourself.
+///
+/// The CUDA runtime interface makes it easy to get the source and destination
+/// mixed up in a call to `cudaMemcpy`. If you pass the pointers in the wrong
+/// order or pass the wrong enum value for the direction parameter, you won't
+/// find out until runtime (if you remembered to check the error return value of
+/// `cudaMemcpy`). In Acxxel there is no verbose direction enum because the name
+/// of the function says which way the copy goes, and mixing up the order of
+/// source and destination is a compile-time error.
+///
+/// The CUDA runtime interface makes you clean up your device memory by calling
+/// `cudaFree` for each call to `cudaMalloc`. In Acxxel, you don't have to worry
+/// about that because the memory cleans itself up when it goes out of scope.
+///
+/// \subsection AcxxelFeatures Acxxel Features
+///
+/// Acxxel provides many nice features compared to the C-like interfaces, such
+/// as the CUDA runtime API, which are normally used for the host code in
+/// applications using accelerators.
+///
+/// \subsubsection TypeSafety Type safety
+///
+/// Most errors involving mixing up types, sources and destinations, or host and
+/// device memory result in helpful compile-time errors.
+///
+/// \subsubsection NoCopySizes No need to specify sizes for memory copies
+///
+/// When the arguments to copy functions such as acxxel::Platform::copyHToD know
+/// their sizes (e.g std::array, std::vector, and C-style arrays), there is no
+/// need to specify the amount of memory to copy; Acxxel will just copy the
+/// whole thing. Of course the copy functions also have overloads that accept an
+/// element count for those times when you don't want to copy everything.
+///
+/// \subsubsection MemoryCleanup Automatic memory cleanup
+///
+/// Device memory allocated with acxxel::Platform::mallocD is automatically
+/// freed when it goes out of scope.
+///
+/// \subsubsection NiceErrorHandling Error handling
+///
+/// Operations that would normally return values return acxxel::Expected obects
+/// in Acxxel. These `Expected` objects contain either a value or an error
+/// message explaining why the value is not present. This reminds the user to
+/// check for errors, but also allows them to opt-out easily be calling the
+/// acxxel::Expected::getValue or acxxel::Expected::takeValue methods. The
+/// `getValue` method returns a reference to the value, leaving the `Expected`
+/// instance as the value owner, whereas the `takeValue` method moves the value
+/// out of the `Expected` object and transfers ownership to the caller.
+///
+/// \subsubsection PlatformIndependence Platform independence
+///
+/// Acxxel code works not only with CUDA, but also with any other platform that
+/// can support its interface. For example, Acxxel supports OpenCL. The
+/// acxxel::getCUDAPlatform and acxxel::getOpenCLPlatform functions are provided
+/// to allow easy access to the built-in CUDA and OpenCL platforms. Other
+/// platforms can be created by implementing the acxxel::Platform interface, and
+/// instances of those classes can be created directly.
+///
+/// \subsubsection CUDAInterop Seamless interoperation with CUDA
+///
+/// Acxxel functions as a modern replacement for the standard CUDA runtime
+/// library and interoperates seamlessly with kernel calls.
+
+#ifndef ACXXEL_ACXXEL_H
+#define ACXXEL_ACXXEL_H
+
+#include "span.h"
+#include "status.h"
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <type_traits>
+
+#if defined(__clang__) || defined(__GNUC__)
+#define ACXXEL_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#else
+#define ACXXEL_WARN_UNUSED_RESULT
+#endif
+
+/// This type is declared here to provide smooth interoperability with the CUDA
+/// triple-chevron kernel launch syntax.
+///
+/// A acxxel::Stream instance will be implicitly convertible to a CUstream_st*,
+/// which is the type expected for the stream argument in the triple-chevron
+/// CUDA kernel launch. This means that a acxxel::Stream can be passed without
+/// explicit casting as the fourth argument to a triple-chevron CUDA kernel
+/// launch.
+struct CUstream_st; // NOLINT
+
+namespace acxxel {
+
+class Event;
+class Platform;
+class Stream;
+
+template <typename T> class DeviceMemory;
+
+template <typename T> class DeviceMemorySpan;
+
+template <typename T> class AsyncHostMemory;
+
+template <typename T> class AsyncHostMemorySpan;
+
+template <typename T> class OwnedAsyncHostMemory;
+
+/// Function type used to destroy opaque handles given out by the platform.
+using HandleDestructor = void (*)(void *);
+
+/// Functor type for enqueuing host callbacks on a stream.
+using StreamCallback = std::function<void(Stream &, const Status &)>;
+
+struct KernelLaunchDimensions {
+ // Intentionally implicit
+ KernelLaunchDimensions(unsigned int BlockX = 1, unsigned int BlockY = 1,
+ unsigned int BlockZ = 1, unsigned int GridX = 1,
+ unsigned int GridY = 1, unsigned int GridZ = 1)
+ : BlockX(BlockX), BlockY(BlockY), BlockZ(BlockZ), GridX(GridX),
+ GridY(GridY), GridZ(GridZ) {}
+
+ unsigned int BlockX;
+ unsigned int BlockY;
+ unsigned int BlockZ;
+ unsigned int GridX;
+ unsigned int GridY;
+ unsigned int GridZ;
+};
+
+/// Logs a warning message.
+void logWarning(const std::string &Message);
+
+/// Gets a pointer to the standard CUDA platform.
+Expected<Platform *> getCUDAPlatform();
+
+/// Gets a pointer to the standard OpenCL platform.
+Expected<Platform *> getOpenCLPlatform();
+
+/// A function that can be executed on the device.
+///
+/// A Kernel is created from a Program by calling Program::createKernel, and a
+/// kernel is enqueued into a Stream by calling Stream::asyncKernelLaunch.
+class Kernel {
+public:
+ Kernel(const Kernel &) = delete;
+ Kernel &operator=(const Kernel &) = delete;
+ Kernel(Kernel &&) noexcept;
+ Kernel &operator=(Kernel &&That) noexcept;
+ ~Kernel() = default;
+
+private:
+ // Only a Program can make a kernel.
+ friend class Program;
+ Kernel(Platform *APlatform, void *AHandle, HandleDestructor Destructor)
+ : ThePlatform(APlatform), TheHandle(AHandle, Destructor) {}
+
+ // Let stream get raw handle for kernel launches.
+ friend class Stream;
+
+ Platform *ThePlatform;
+ std::unique_ptr<void, HandleDestructor> TheHandle;
+};
+
+/// A program loaded on a device.
+///
+/// A program can be created by calling Platform::createProgramFromSource, and a
+/// Kernel can be created from a program by running Program::createKernel.
+///
+/// A program can contain any number of kernels, and a program only needs to be
+/// loaded once in order to use all its kernels.
+class Program {
+public:
+ Program(const Program &) = delete;
+ Program &operator=(const Program &) = delete;
+ Program(Program &&) noexcept;
+ Program &operator=(Program &&That) noexcept;
+ ~Program() = default;
+
+ Expected<Kernel> createKernel(const std::string &Name);
+
+private:
+ // Only a platform can make a program.
+ friend class Platform;
+ Program(Platform *APlatform, void *AHandle, HandleDestructor Destructor)
+ : ThePlatform(APlatform), TheHandle(AHandle, Destructor) {}
+
+ Platform *ThePlatform;
+ std::unique_ptr<void, HandleDestructor> TheHandle;
+};
+
+/// A stream of computation.
+///
+/// All operations enqueued on a Stream are serialized, but operations enqueued
+/// on different Streams may run concurrently.
+///
+/// Each Platform has a notion of the currently active device on a particular
+/// thread (see Platform::getActiveDeviceForThread and
+/// Platform::setActiveDeviceForThread). Each Stream is associated with a
+/// specific, fixed device, set to the current thread's active device when the
+/// Stream is created. Whenver a thread enqueues commands onto a Stream, its
+/// active device must match the Stream's device.
+class Stream {
+public:
+ Stream(const Stream &) = delete;
+ Stream &operator=(const Stream &) = delete;
+ Stream(Stream &&) noexcept;
+ Stream &operator=(Stream &&) noexcept;
+ ~Stream() = default;
+
+ /// Gets the index of the device on which this Stream operates.
+ int getDeviceIndex() { return TheDeviceIndex; }
+
+ /// Blocks the host until the Stream is done executing all previously enqueued
+ /// work.
+ ///
+ /// Returns a Status for any errors emitted by the asynchronous work on the
+ /// Stream, or by any error in the synchronization process itself. Clears the
+ /// Status state of the stream.
+ Status sync() ACXXEL_WARN_UNUSED_RESULT;
+
+ /// Makes all future work submitted to this stream wait until the event
+ /// reports completion.
+ ///
+ /// This is useful because the event argument may be recorded on a different
+ /// stream, so this method allows for synchronization between streams without
+ /// synchronizing all streams.
+ ///
+ /// Returns a Status for any errors emitted by the asynchronous work on the
+ /// Stream, or by any error in the synchronization process itself. Clears the
+ /// Status state of the stream.
+ Status waitOnEvent(Event &Event) ACXXEL_WARN_UNUSED_RESULT;
+
+ /// Adds a host callback function to the stream.
+ ///
+ /// The callback will be called on the host after all previously enqueued work
+ /// on the stream is complete, and no work enqueued after the callback will
+ /// begin until after the callback has finished.
+ Stream &addCallback(std::function<void(Stream &, const Status &)> Callback);
+
+ /// \name Asynchronous device memory copies.
+ ///
+ /// These functions enqueue asynchronous memory copy operations into the
+ /// stream. Only async host memory is allowed for host arguments to these
+ /// functions. Async host memory can be created from normal host memory by
+ /// registering it with Platform::registerHostMem. AsyncHostMemory can also be
+ /// allocated directly by calling Platform::newAsyncHostMem.
+ ///
+ /// For all these functions, DeviceSrcTy must be convertible to
+ /// DeviceMemorySpan<const T>, DeviceDstTy must be convertible to
+ /// DeviceMemorySpan<T>, HostSrcTy must be convertible to
+ /// AsyncHostMemorySpan<const T> and HostDstTy must be convertible to
+ /// AsyncHostMemorySpan<T>. Additionally, the T types must match for the
+ /// destination and source.
+ /// \{
+
+ /// Copies from device memory to device memory.
+ template <typename DeviceSrcTy, typename DeviceDstTy>
+ Stream &asyncCopyDToD(DeviceSrcTy &&DeviceSrc, DeviceDstTy &&DeviceDst);
+
+ /// Copies from device memory to device memory with a given element count.
+ template <typename DeviceSrcTy, typename DeviceDstTy>
+ Stream &asyncCopyDToD(DeviceSrcTy &&DeviceSrc, DeviceDstTy &&DeviceDst,
+ ptrdiff_t ElementCount);
+
+ /// Copies from device memory to host memory.
+ template <typename DeviceSrcTy, typename HostDstTy>
+ Stream &asyncCopyDToH(DeviceSrcTy &&DeviceSrc, HostDstTy &&HostDst);
+
+ /// Copies from device memory to host memory with a given element count.
+ template <typename DeviceSrcTy, typename HostDstTy>
+ Stream &asyncCopyDToH(DeviceSrcTy &&DeviceSrc, HostDstTy &&HostDst,
+ ptrdiff_t ElementCount);
+
+ /// Copies from host memory to device memory.
+ template <typename HostSrcTy, typename DeviceDstTy>
+ Stream &asyncCopyHToD(HostSrcTy &&HostSrc, DeviceDstTy &&DeviceDst);
+
+ /// Copies from host memory to device memory with a given element count.
+ template <typename HostSrcTy, typename DeviceDstTy>
+ Stream &asyncCopyHToD(HostSrcTy &&HostSrc, DeviceDstTy &DeviceDst,
+ ptrdiff_t ElementCount);
+
+ /// \}
+
+ /// \name Stream-synchronous device memory copies
+ ///
+ /// These functions block the host until the copy and all previously-enqueued
+ /// work on the stream has completed.
+ ///
+ /// For all these functions, DeviceSrcTy must be convertible to
+ /// DeviceMemorySpan<const T>, DeviceDstTy must be convertible to
+ /// DeviceMemorySpan<T>, HostSrcTy must be convertible to Span<const T> and
+ /// HostDstTy must be convertible to Span<T>. Additionally, the T types must
+ /// match for the destination and source.
+ /// \{
+
+ template <typename DeviceSrcTy, typename DeviceDstTy>
+ Stream &syncCopyDToD(DeviceSrcTy &&DeviceSrc, DeviceDstTy &&DeviceDst);
+
+ template <typename DeviceSrcTy, typename DeviceDstTy>
+ Stream &syncCopyDToD(DeviceSrcTy &&DeviceSrc, DeviceDstTy &&DeviceDst,
+ ptrdiff_t ElementCount);
+
+ template <typename DeviceSrcTy, typename HostDstTy>
+ Stream &syncCopyDToH(DeviceSrcTy &&DeviceSrc, HostDstTy &&HostDst);
+
+ template <typename DeviceSrcTy, typename HostDstTy>
+ Stream &syncCopyDToH(DeviceSrcTy &&DeviceSrc, HostDstTy &&HostDst,
+ ptrdiff_t ElementCount);
+
+ template <typename HostSrcTy, typename DeviceDstTy>
+ Stream &syncCopyHToD(HostSrcTy &&HostSrc, DeviceDstTy &&DeviceDst);
+
+ template <typename HostSrcTy, typename DeviceDstTy>
+ Stream &syncCopyHToD(HostSrcTy &&HostSrc, DeviceDstTy &DeviceDst,
+ ptrdiff_t ElementCount);
+
+ /// \}
+
+ /// Enqueues an operation in the stream to set the bytes of a given device
+ /// memory region to a given value.
+ ///
+ /// DeviceDstTy must be convertible to DeviceMemorySpan<T> for non-const T.
+ template <typename DeviceDstTy>
+ Stream &asyncMemsetD(DeviceDstTy &&DeviceDst, char ByteValue);
+
+ /// Enqueues a kernel launch operation on this stream.
+ Stream &asyncKernelLaunch(const Kernel &TheKernel,
+ KernelLaunchDimensions LaunchDimensions,
+ Span<void *> Arguments, Span<size_t> ArgumentSizes,
+ size_t SharedMemoryBytes = 0);
+
+ /// Enqueues an event in the stream.
+ Stream &enqueueEvent(Event &E);
+
+ // Allows implicit conversion to (CUstream_st *). This makes triple-chevron
+ // kernel calls look nicer because you can just pass a acxxel::Stream
+ // directly.
+ operator CUstream_st *() {
+ return static_cast<CUstream_st *>(TheHandle.get());
+ }
+
+ /// Gets the current status for the Stream and clears the Stream's status.
+ Status takeStatus() ACXXEL_WARN_UNUSED_RESULT {
+ Status OldStatus = TheStatus;
+ TheStatus = Status();
+ return OldStatus;
+ }
+
+private:
+ // Only a platform can make a stream.
+ friend class Platform;
+ Stream(Platform *APlatform, int DeviceIndex, void *AHandle,
+ HandleDestructor Destructor)
+ : ThePlatform(APlatform), TheDeviceIndex(DeviceIndex),
+ TheHandle(AHandle, Destructor) {}
+
+ const Status &setStatus(const Status &S) {
+ if (S.isError() && !TheStatus.isError()) {
+ TheStatus = S;
+ }
+ return S;
+ }
+
+ Status takeStatusOr(const Status &S) {
+ if (TheStatus.isError()) {
+ Status OldStatus = TheStatus;
+ TheStatus = Status();
+ return OldStatus;
+ }
+ return S;
+ }
+
+ // The platform that created the stream.
+ Platform *ThePlatform;
+
+ // The index of the device on which the stream operates.
+ int TheDeviceIndex;
+
+ // A handle to the platform-specific handle implementation.
+ std::unique_ptr<void, HandleDestructor> TheHandle;
+ Status TheStatus;
+};
+
+/// A user-created event on a device.
+///
+/// This is useful for setting synchronization points in a Stream. The host can
+/// synchronize with a Stream without using events, but that requires all the
+/// work in the Stream to be finished in order for the host to be notified.
+/// Events provide more flexibility by allowing the host to be notified when a
+/// single Event in the Stream is finished, rather than all the work in the
+/// Stream.
+class Event {
+public:
+ Event(const Event &) = delete;
+ Event &operator=(const Event &) = delete;
+ Event(Event &&) noexcept;
+ Event &operator=(Event &&That) noexcept;
+ ~Event() = default;
+
+ /// Checks to see if the event is done running.
+ bool isDone();
+
+ /// Blocks the host until the event is done.
+ Status sync();
+
+ /// Gets the time elapsed between the previous event's execution and this
+ /// event's execution.
+ Expected<float> getSecondsSince(const Event &Previous);
+
+private:
+ // Only a platform can make an event.
+ friend class Platform;
+ Event(Platform *APlatform, void *AHandle, HandleDestructor Destructor)
+ : ThePlatform(APlatform), TheHandle(AHandle, Destructor) {}
+
+ Platform *ThePlatform;
+ std::unique_ptr<void, HandleDestructor> TheHandle;
+};
+
+/// An accelerator platform.
+///
+/// This is the base class for all platforms such as CUDA and OpenCL. It
+/// contains many virtual methods that must be overridden by each platform
+/// implementation.
+///
+/// It also has some template wrapper functions that take care of type checking
+/// and then forward their arguments on to raw virtual functions that are
+/// implemented by each specific platform.
+class Platform {
+public:
+ virtual ~Platform(){};
+
+ /// Gets the number of devices for this platform in this system.
+ virtual Expected<int> getDeviceCount() = 0;
+
+ /// Sets the active device for this platform in this thread.
+ virtual Status setActiveDeviceForThread(int DeviceIndex) = 0;
+
+ /// Gets the currently active device for this platform in this thread.
+ virtual int getActiveDeviceForThread() = 0;
+
+ /// Creates a stream for the platform.
+ ///
+ /// The created Stream is associated with the active device for this thread.
+ virtual Expected<Stream> createStream() = 0;
+
+ /// Creates an event for the platform.
+ ///
+ /// The created Event is associated with the active device for this thread.
+ virtual Expected<Event> createEvent() = 0;
+
+ /// Allocates owned device memory.
+ ///
+ /// \warning This function only allocates space in device memory, it does not
+ /// call the constructor of T.
+ template <typename T>
+ Expected<DeviceMemory<T>> mallocD(ptrdiff_t ElementCount) {
+ Expected<void *> MaybePointer = rawMallocD(ElementCount * sizeof(T));
+ if (MaybePointer.isError())
+ return MaybePointer.getError();
+ return DeviceMemory<T>(this, MaybePointer.getValue(), ElementCount,
+ this->getDeviceMemoryHandleDestructor());
+ }
+
+ /// Creates a DeviceMemorySpan for a device symbol.
+ ///
+ /// This function is present to support __device__ variables in CUDA. Given a
+ /// pointer to a __device__ variable, this function returns a DeviceMemorySpan
+ /// referencing the device memory that stores that __device__ variable.
+ template <typename ElementType>
+ Expected<DeviceMemorySpan<ElementType>> getSymbolMemory(ElementType *Symbol) {
+ Expected<void *> MaybeAddress = rawGetDeviceSymbolAddress(Symbol);
+ if (MaybeAddress.isError())
+ return MaybeAddress.getError();
+ ElementType *Address = static_cast<ElementType *>(MaybeAddress.getValue());
+ Expected<ptrdiff_t> MaybeSize = rawGetDeviceSymbolSize(Symbol);
+ if (MaybeSize.isError())
+ return MaybeSize.getError();
+ ptrdiff_t Size = MaybeSize.getValue();
+ return DeviceMemorySpan<ElementType>(this, Address,
+ Size / sizeof(ElementType), 0);
+ }
+
+ /// \name Host memory registration functions.
+ /// \{
+
+ template <typename T>
+ Expected<AsyncHostMemory<const T>> registerHostMem(Span<const T> Memory) {
+ Status S = rawRegisterHostMem(Memory.data(), Memory.size() * sizeof(T));
+ if (S.isError())
+ return S;
+ return AsyncHostMemory<const T>(
+ Memory.data(), Memory.size(),
+ this->getUnregisterHostMemoryHandleDestructor());
+ }
+
+ template <typename T>
+ Expected<AsyncHostMemory<T>> registerHostMem(Span<T> Memory) {
+ Status S = rawRegisterHostMem(Memory.data(), Memory.size() * sizeof(T));
+ if (S.isError())
+ return S;
+ return AsyncHostMemory<T>(Memory.data(), Memory.size(),
+ this->getUnregisterHostMemoryHandleDestructor());
+ }
+
+ template <typename T, size_t N>
+ Expected<AsyncHostMemory<T>> registerHostMem(T (&Array)[N]) {
+ Span<T> Span(Array);
+ Status S = rawRegisterHostMem(Span.data(), Span.size() * sizeof(T));
+ if (S.isError())
+ return S;
+ return AsyncHostMemory<T>(Span.data(), Span.size(),
+ this->getUnregisterHostMemoryHandleDestructor());
+ }
+
+ /// Registers memory stored in a container with a data() member function and
+ /// which can be converted to a Span<T*>.
+ template <typename Container>
+ auto registerHostMem(Container &Cont) -> Expected<AsyncHostMemory<
+ typename std::remove_reference<decltype(*Cont.data())>::type>> {
+ using ValueType =
+ typename std::remove_reference<decltype(*Cont.data())>::type;
+ Span<ValueType> Span(Cont);
+ Status S = rawRegisterHostMem(Span.data(), Span.size() * sizeof(ValueType));
+ if (S.isError())
+ return S;
+ return AsyncHostMemory<ValueType>(
+ Span.data(), Span.size(),
+ this->getUnregisterHostMemoryHandleDestructor());
+ }
+
+ /// Allocates an owned, registered array of objects on the host.
+ ///
+ /// Default constructs each element in the resulting array.
+ template <typename T>
+ Expected<OwnedAsyncHostMemory<T>> newAsyncHostMem(ptrdiff_t ElementCount) {
+ Expected<void *> MaybeMemory =
+ rawMallocRegisteredH(ElementCount * sizeof(T));
+ if (MaybeMemory.isError())
+ return MaybeMemory.getError();
+ T *Memory = static_cast<T *>(MaybeMemory.getValue());
+ for (ptrdiff_t I = 0; I < ElementCount; ++I)
+ new (Memory + I) T;
+ return OwnedAsyncHostMemory<T>(Memory, ElementCount,
+ this->getFreeHostMemoryHandleDestructor());
+ }
+
+ /// \}
+
+ virtual Expected<Program>
+ createProgramFromSource(Span<const char> Source) = 0;
+
+protected:
+ friend class Stream;
+ friend class Event;
+ friend class Program;
+ template <typename T> friend class DeviceMemorySpan;
+
+ void *getStreamHandle(Stream &Stream) { return Stream.TheHandle.get(); }
+ void *getEventHandle(Event &Event) { return Event.TheHandle.get(); }
+
+ // Pass along access to Stream constructor to subclasses.
+ Stream constructStream(Platform *APlatform, void *AHandle,
+ HandleDestructor Destructor) {
+ return Stream(APlatform, getActiveDeviceForThread(), AHandle, Destructor);
+ }
+
+ // Pass along access to Event constructor to subclasses.
+ Event constructEvent(Platform *APlatform, void *AHandle,
+ HandleDestructor Destructor) {
+ return Event(APlatform, AHandle, Destructor);
+ }
+
+ // Pass along access to Program constructor to subclasses.
+ Program constructProgram(Platform *APlatform, void *AHandle,
+ HandleDestructor Destructor) {
+ return Program(APlatform, AHandle, Destructor);
+ }
+
+ virtual Status streamSync(void *Stream) = 0;
+ virtual Status streamWaitOnEvent(void *Stream, void *Event) = 0;
+
+ virtual Status enqueueEvent(void *Event, void *Stream) = 0;
+ virtual bool eventIsDone(void *Event) = 0;
+ virtual Status eventSync(void *Event) = 0;
+ virtual Expected<float> getSecondsBetweenEvents(void *StartEvent,
+ void *EndEvent) = 0;
+
+ virtual Expected<void *> rawMallocD(ptrdiff_t ByteCount) = 0;
+ virtual HandleDestructor getDeviceMemoryHandleDestructor() = 0;
+ virtual void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
+ size_t ByteOffset) = 0;
+ virtual void rawDestroyDeviceMemorySpanHandle(void *Handle) = 0;
+
+ virtual Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol) = 0;
+ virtual Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol) = 0;
+
+ virtual Status rawCopyDToD(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) = 0;
+ virtual Status rawCopyDToH(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset, void *HostDst,
+ ptrdiff_t ByteCount) = 0;
+ virtual Status rawCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) = 0;
+
+ virtual Status rawMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
+ ptrdiff_t ByteCount, char ByteValue) = 0;
+
+ virtual Status rawRegisterHostMem(const void *Memory,
+ ptrdiff_t ByteCount) = 0;
+ virtual HandleDestructor getUnregisterHostMemoryHandleDestructor() = 0;
+
+ virtual Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) = 0;
+ virtual HandleDestructor getFreeHostMemoryHandleDestructor() = 0;
+
+ virtual Status asyncCopyDToD(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount, void *Stream) = 0;
+ virtual Status asyncCopyDToH(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset, void *HostDst,
+ ptrdiff_t ByteCount, void *Stream) = 0;
+ virtual Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount, void *Stream) = 0;
+
+ virtual Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
+ ptrdiff_t ByteCount, char ByteValue,
+ void *Stream) = 0;
+
+ virtual Status addStreamCallback(Stream &Stream, StreamCallback Callback) = 0;
+
+ virtual Expected<void *> rawCreateKernel(void *Program,
+ const std::string &Name) = 0;
+ virtual HandleDestructor getKernelHandleDestructor() = 0;
+
+ virtual Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
+ KernelLaunchDimensions LaunchDimensions,
+ Span<void *> Arguments,
+ Span<size_t> ArgumentSizes,
+ size_t SharedMemoryBytes) = 0;
+};
+
+// Implementation of templated Stream functions.
+
+template <typename DeviceSrcTy, typename DeviceDstTy>
+Stream &Stream::asyncCopyDToD(DeviceSrcTy &&DeviceSrc,
+ DeviceDstTy &&DeviceDst) {
+ using SrcElementTy =
+ typename std::remove_reference<DeviceSrcTy>::type::value_type;
+ using DstElementTy =
+ typename std::remove_reference<DeviceDstTy>::type::value_type;
+ static_assert(std::is_same<SrcElementTy, DstElementTy>::value,
+ "asyncCopyDToD cannot copy between arrays of different types");
+ DeviceMemorySpan<const SrcElementTy> DeviceSrcSpan(DeviceSrc);
+ DeviceMemorySpan<DstElementTy> DeviceDstSpan(DeviceDst);
+ if (DeviceSrcSpan.size() != DeviceDstSpan.size()) {
+ setStatus(Status("asyncCopyDToD source element count " +
+ std::to_string(DeviceSrcSpan.size()) +
+ " does not equal destination element count " +
+ std::to_string(DeviceDstSpan.size())));
+ return *this;
+ }
+ setStatus(ThePlatform->asyncCopyDToD(
+ DeviceSrcSpan.baseHandle(), DeviceSrcSpan.byte_offset(),
+ DeviceDstSpan.baseHandle(), DeviceDstSpan.byte_offset(),
+ DeviceSrcSpan.byte_size(), TheHandle.get()));
+ return *this;
+}
+
+template <typename DeviceSrcTy, typename DeviceDstTy>
+Stream &Stream::asyncCopyDToD(DeviceSrcTy &&DeviceSrc, DeviceDstTy &&DeviceDst,
+ ptrdiff_t ElementCount) {
+ using SrcElementTy =
+ typename std::remove_reference<DeviceSrcTy>::type::value_type;
+ using DstElementTy =
+ typename std::remove_reference<DeviceDstTy>::type::value_type;
+ static_assert(std::is_same<SrcElementTy, DstElementTy>::value,
+ "asyncCopyDToD cannot copy between arrays of different types");
+ DeviceMemorySpan<const SrcElementTy> DeviceSrcSpan(DeviceSrc);
+ DeviceMemorySpan<DstElementTy> DeviceDstSpan(DeviceDst);
+ if (DeviceSrcSpan.size() < ElementCount) {
+ setStatus(Status("asyncCopyDToD source element count " +
+ std::to_string(DeviceSrcSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ if (DeviceDstSpan.size() < ElementCount) {
+ setStatus(Status("asyncCopyDToD destination element count " +
+ std::to_string(DeviceDst.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ setStatus(ThePlatform->asyncCopyDToD(
+ DeviceSrcSpan.baseHandle(), DeviceSrcSpan.byte_offset(),
+ DeviceDstSpan.baseHandle(), DeviceDstSpan.byte_offset(),
+ ElementCount * sizeof(SrcElementTy), TheHandle.get()));
+ return *this;
+}
+
+template <typename DeviceSrcTy, typename HostDstTy>
+Stream &Stream::asyncCopyDToH(DeviceSrcTy &&DeviceSrc, HostDstTy &&HostDst) {
+ using SrcElementTy =
+ typename std::remove_reference<DeviceSrcTy>::type::value_type;
+ DeviceMemorySpan<const SrcElementTy> DeviceSrcSpan(DeviceSrc);
+ AsyncHostMemorySpan<SrcElementTy> HostDstSpan(HostDst);
+ if (DeviceSrcSpan.size() != HostDstSpan.size()) {
+ setStatus(Status("asyncCopyDToH source element count " +
+ std::to_string(DeviceSrcSpan.size()) +
+ " does not equal destination element count " +
+ std::to_string(HostDstSpan.size())));
+ return *this;
+ }
+ setStatus(ThePlatform->asyncCopyDToH(
+ DeviceSrcSpan.baseHandle(), DeviceSrcSpan.byte_offset(),
+ HostDstSpan.data(), DeviceSrcSpan.byte_size(), TheHandle.get()));
+ return *this;
+}
+
+template <typename DeviceSrcTy, typename HostDstTy>
+Stream &Stream::asyncCopyDToH(DeviceSrcTy &&DeviceSrc, HostDstTy &&HostDst,
+ ptrdiff_t ElementCount) {
+ using SrcElementTy =
+ typename std::remove_reference<DeviceSrcTy>::type::value_type;
+ DeviceMemorySpan<const SrcElementTy> DeviceSrcSpan(DeviceSrc);
+ AsyncHostMemorySpan<SrcElementTy> HostDstSpan(HostDst);
+ if (DeviceSrcSpan.size() < ElementCount) {
+ setStatus(Status("asyncCopyDToH source element count " +
+ std::to_string(DeviceSrcSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ if (HostDstSpan.size() < ElementCount) {
+ setStatus(Status("asyncCopyDToH destination element count " +
+ std::to_string(HostDstSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ setStatus(ThePlatform->asyncCopyDToH(
+ DeviceSrcSpan.baseHandle(), DeviceSrcSpan.byte_offset(),
+ HostDstSpan.data(), ElementCount * sizeof(SrcElementTy),
+ TheHandle.get()));
+ return *this;
+}
+
+template <typename HostSrcTy, typename DeviceDstTy>
+Stream &Stream::asyncCopyHToD(HostSrcTy &&HostSrc, DeviceDstTy &&DeviceDst) {
+ using DstElementTy =
+ typename std::remove_reference<DeviceDstTy>::type::value_type;
+ AsyncHostMemorySpan<const DstElementTy> HostSrcSpan(HostSrc);
+ DeviceMemorySpan<DstElementTy> DeviceDstSpan(DeviceDst);
+ if (HostSrcSpan.size() != DeviceDstSpan.size()) {
+ setStatus(Status("asyncCopyHToD source element count " +
+ std::to_string(HostSrcSpan.size()) +
+ " does not equal destination element count " +
+ std::to_string(DeviceDstSpan.size())));
+ return *this;
+ }
+ setStatus(ThePlatform->asyncCopyHToD(
+ HostSrcSpan.data(), DeviceDstSpan.baseHandle(),
+ DeviceDstSpan.byte_offset(), HostSrcSpan.byte_size(), TheHandle.get()));
+ return *this;
+}
+
+template <typename HostSrcTy, typename DeviceDstTy>
+Stream &Stream::asyncCopyHToD(HostSrcTy &&HostSrc, DeviceDstTy &DeviceDst,
+ ptrdiff_t ElementCount) {
+ using DstElementTy =
+ typename std::remove_reference<DeviceDstTy>::type::value_type;
+ AsyncHostMemorySpan<const DstElementTy> HostSrcSpan(HostSrc);
+ DeviceMemorySpan<DstElementTy> DeviceDstSpan(DeviceDst);
+ if (HostSrcSpan.size() < ElementCount) {
+ setStatus(Status("copyHToD source element count " +
+ std::to_string(HostSrcSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ if (DeviceDstSpan.size() < ElementCount) {
+ setStatus(Status("copyHToD destination element count " +
+ std::to_string(DeviceDstSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ setStatus(ThePlatform->asyncCopyHToD(
+ HostSrcSpan.data(), DeviceDstSpan.baseHandle(),
+ DeviceDstSpan.byte_offset(), ElementCount * sizeof(DstElementTy),
+ TheHandle.get()));
+ return *this;
+}
+
+template <typename DeviceDstTy>
+Stream &Stream::asyncMemsetD(DeviceDstTy &&DeviceDst, char ByteValue) {
+ using DstElementTy =
+ typename std::remove_reference<DeviceDstTy>::type::value_type;
+ DeviceMemorySpan<DstElementTy> DeviceDstSpan(DeviceDst);
+ setStatus(ThePlatform->asyncMemsetD(
+ DeviceDstSpan.baseHandle(), DeviceDstSpan.byte_offset(),
+ DeviceDstSpan.byte_size(), ByteValue, TheHandle.get()));
+ return *this;
+}
+
+template <typename DeviceSrcTy, typename DeviceDstTy>
+Stream &Stream::syncCopyDToD(DeviceSrcTy &&DeviceSrc, DeviceDstTy &&DeviceDst) {
+ using SrcElementTy =
+ typename std::remove_reference<DeviceSrcTy>::type::value_type;
+ using DstElementTy =
+ typename std::remove_reference<DeviceDstTy>::type::value_type;
+ static_assert(std::is_same<SrcElementTy, DstElementTy>::value,
+ "copyDToD cannot copy between arrays of different types");
+ DeviceMemorySpan<const SrcElementTy> DeviceSrcSpan(DeviceSrc);
+ DeviceMemorySpan<DstElementTy> DeviceDstSpan(DeviceDst);
+ if (DeviceSrcSpan.size() != DeviceDstSpan.size()) {
+ setStatus(Status("copyDToD source element count " +
+ std::to_string(DeviceSrcSpan.size()) +
+ " does not equal destination element count " +
+ std::to_string(DeviceDstSpan.size())));
+ return *this;
+ }
+ if (setStatus(ThePlatform->asyncCopyDToD(
+ DeviceSrcSpan.baseHandle(), DeviceSrcSpan.byte_offset(),
+ DeviceDstSpan.baseHandle(), DeviceDstSpan.byte_offset(),
+ DeviceSrcSpan.byte_size(), TheHandle.get()))
+ .isError()) {
+ return *this;
+ }
+ setStatus(sync());
+ return *this;
+}
+
+template <typename DeviceSrcTy, typename DeviceDstTy>
+Stream &Stream::syncCopyDToD(DeviceSrcTy &&DeviceSrc, DeviceDstTy &&DeviceDst,
+ ptrdiff_t ElementCount) {
+ using SrcElementTy =
+ typename std::remove_reference<DeviceSrcTy>::type::value_type;
+ using DstElementTy =
+ typename std::remove_reference<DeviceDstTy>::type::value_type;
+ static_assert(std::is_same<SrcElementTy, DstElementTy>::value,
+ "copyDToD cannot copy between arrays of different types");
+ DeviceMemorySpan<const SrcElementTy> DeviceSrcSpan(DeviceSrc);
+ DeviceMemorySpan<DstElementTy> DeviceDstSpan(DeviceDst);
+ if (DeviceSrcSpan.size() < ElementCount) {
+ setStatus(Status("copyDToD source element count " +
+ std::to_string(DeviceSrcSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ if (DeviceDstSpan.size() < ElementCount) {
+ setStatus(Status("copyDToD destination element count " +
+ std::to_string(DeviceDst.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ if (setStatus(ThePlatform->asyncCopyDToD(
+ DeviceSrcSpan.baseHandle(), DeviceSrcSpan.byte_offset(),
+ DeviceDstSpan.baseHandle(), DeviceDstSpan.byte_offset(),
+ ElementCount * sizeof(SrcElementTy), TheHandle.get()))
+ .isError()) {
+ return *this;
+ }
+ setStatus(sync());
+ return *this;
+}
+
+template <typename DeviceSrcTy, typename HostDstTy>
+Stream &Stream::syncCopyDToH(DeviceSrcTy &&DeviceSrc, HostDstTy &&HostDst) {
+ using SrcElementTy =
+ typename std::remove_reference<DeviceSrcTy>::type::value_type;
+ DeviceMemorySpan<const SrcElementTy> DeviceSrcSpan(DeviceSrc);
+ Span<SrcElementTy> HostDstSpan(HostDst);
+ if (DeviceSrcSpan.size() != HostDstSpan.size()) {
+ setStatus(Status("copyDToH source element count " +
+ std::to_string(DeviceSrcSpan.size()) +
+ " does not equal destination element count " +
+ std::to_string(HostDstSpan.size())));
+ return *this;
+ }
+ if (setStatus(ThePlatform->asyncCopyDToH(
+ DeviceSrcSpan.baseHandle(), DeviceSrcSpan.byte_offset(),
+ HostDstSpan.data(), DeviceSrcSpan.byte_size(),
+ TheHandle.get()))
+ .isError()) {
+ return *this;
+ }
+ setStatus(sync());
+ return *this;
+}
+
+template <typename DeviceSrcTy, typename HostDstTy>
+Stream &Stream::syncCopyDToH(DeviceSrcTy &&DeviceSrc, HostDstTy &&HostDst,
+ ptrdiff_t ElementCount) {
+ using SrcElementTy =
+ typename std::remove_reference<DeviceSrcTy>::type::value_type;
+ DeviceMemorySpan<const SrcElementTy> DeviceSrcSpan(DeviceSrc);
+ Span<SrcElementTy> HostDstSpan(HostDst);
+ if (DeviceSrcSpan.size() < ElementCount) {
+ setStatus(Status("copyDToH source element count " +
+ std::to_string(DeviceSrcSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ if (HostDstSpan.size() < ElementCount) {
+ setStatus(Status("copyDToH destination element count " +
+ std::to_string(HostDstSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ if (setStatus(ThePlatform->asyncCopyDToH(
+ DeviceSrcSpan.baseHandle(), DeviceSrcSpan.byte_offset(),
+ HostDstSpan.data(), ElementCount * sizeof(SrcElementTy),
+ TheHandle.get()))
+ .isError()) {
+ return *this;
+ }
+ setStatus(sync());
+ return *this;
+}
+
+template <typename HostSrcTy, typename DeviceDstTy>
+Stream &Stream::syncCopyHToD(HostSrcTy &&HostSrc, DeviceDstTy &&DeviceDst) {
+ using DstElementTy =
+ typename std::remove_reference<DeviceDstTy>::type::value_type;
+ Span<const DstElementTy> HostSrcSpan(HostSrc);
+ DeviceMemorySpan<DstElementTy> DeviceDstSpan(DeviceDst);
+ if (HostSrcSpan.size() != DeviceDstSpan.size()) {
+ setStatus(Status("copyHToD source element count " +
+ std::to_string(HostSrcSpan.size()) +
+ " does not equal destination element count " +
+ std::to_string(DeviceDstSpan.size())));
+ return *this;
+ }
+ if (setStatus(ThePlatform->asyncCopyHToD(
+ HostSrcSpan.data(), DeviceDstSpan.baseHandle(),
+ DeviceDstSpan.byte_offset(), DeviceDstSpan.byte_size(),
+ TheHandle.get()))
+ .isError()) {
+ return *this;
+ }
+ setStatus(sync());
+ return *this;
+}
+
+template <typename HostSrcTy, typename DeviceDstTy>
+Stream &Stream::syncCopyHToD(HostSrcTy &&HostSrc, DeviceDstTy &DeviceDst,
+ ptrdiff_t ElementCount) {
+ using DstElementTy =
+ typename std::remove_reference<DeviceDstTy>::type::value_type;
+ Span<const DstElementTy> HostSrcSpan(HostSrc);
+ DeviceMemorySpan<DstElementTy> DeviceDstSpan(DeviceDst);
+ if (HostSrcSpan.size() < ElementCount) {
+ setStatus(Status("copyHToD source element count " +
+ std::to_string(HostSrcSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ if (DeviceDstSpan.size() < ElementCount) {
+ setStatus(Status("copyHToD destination element count " +
+ std::to_string(DeviceDstSpan.size()) +
+ " is less than requested element count " +
+ std::to_string(ElementCount)));
+ return *this;
+ }
+ if (setStatus(ThePlatform->asyncCopyHToD(
+ HostSrcSpan.data(), DeviceDstSpan.baseHandle(),
+ DeviceDstSpan.byte_offset(),
+ ElementCount * sizeof(DstElementTy), TheHandle.get()))
+ .isError()) {
+ return *this;
+ }
+ setStatus(sync());
+ return *this;
+}
+
+/// Owned device memory.
+///
+/// Device memory that frees itself when it goes out of scope.
+template <typename ElementType> class DeviceMemory {
+public:
+ using element_type = ElementType;
+ using index_type = std::ptrdiff_t;
+ using value_type = typename std::remove_const<element_type>::type;
+
+ DeviceMemory(const DeviceMemory &) = delete;
+ DeviceMemory &operator=(const DeviceMemory &) = delete;
+ DeviceMemory(DeviceMemory &&) noexcept;
+ DeviceMemory &operator=(DeviceMemory &&) noexcept;
+ ~DeviceMemory() = default;
+
+ /// Gets the raw base handle for the underlying platform implementation.
+ void *handle() const { return ThePointer.get(); }
+
+ index_type length() const { return TheSize; }
+ index_type size() const { return TheSize; }
+ index_type byte_size() const { // NOLINT
+ return TheSize * sizeof(element_type);
+ }
+ bool empty() const { return TheSize == 0; }
+
+ // These conversion operators are useful for making triple-chevron kernel
+ // launches more concise.
+ operator element_type *() {
+ return static_cast<element_type *>(ThePointer.get());
+ }
+ operator const element_type *() const { return ThePointer.get(); }
+
+ /// Converts a const object to a DeviceMemorySpan of const elements.
+ DeviceMemorySpan<const element_type> asSpan() const {
+ return DeviceMemorySpan<const element_type>(
+ ThePlatform, static_cast<const element_type *>(ThePointer.get()),
+ TheSize, 0);
+ }
+
+ /// Converts an object to a DeviceMemorySpan.
+ DeviceMemorySpan<element_type> asSpan() {
+ return DeviceMemorySpan<element_type>(
+ ThePlatform, static_cast<element_type *>(ThePointer.get()), TheSize, 0);
+ }
+
+private:
+ friend class Platform;
+ template <typename T> friend class DeviceMemorySpan;
+
+ DeviceMemory(Platform *ThePlatform, void *Pointer, index_type ElementCount,
+ HandleDestructor Destructor)
+ : ThePlatform(ThePlatform), ThePointer(Pointer, Destructor),
+ TheSize(ElementCount) {}
+
+ Platform *ThePlatform;
+ std::unique_ptr<void, HandleDestructor> ThePointer;
+ ptrdiff_t TheSize;
+};
+
+template <typename T>
+DeviceMemory<T>::DeviceMemory(DeviceMemory &&) noexcept = default;
+template <typename T>
+DeviceMemory<T> &DeviceMemory<T>::operator=(DeviceMemory &&) noexcept = default;
+
+/// View into device memory.
+///
+/// Like a Span, but for device memory rather than host memory.
+template <typename ElementType> class DeviceMemorySpan {
+public:
+ /// \name constants and types
+ /// \{
+ using element_type = ElementType;
+ using index_type = std::ptrdiff_t;
+ using pointer = element_type *;
+ using reference = element_type &;
+ using iterator = element_type *;
+ using const_iterator = const element_type *;
+ using value_type = typename std::remove_const<element_type>::type;
+ /// \}
+
+ DeviceMemorySpan()
+ : ThePlatform(nullptr), TheHandle(nullptr), TheSize(0), TheOffset(0),
+ TheSpanHandle(nullptr) {}
+
+ // Intentionally implicit.
+ template <typename OtherElementType>
+ DeviceMemorySpan(DeviceMemorySpan<OtherElementType> &ASpan)
+ : ThePlatform(ASpan.ThePlatform),
+ TheHandle(static_cast<pointer>(ASpan.baseHandle())),
+ TheSize(ASpan.size()), TheOffset(ASpan.offset()),
+ TheSpanHandle(nullptr) {}
+
+ // Intentionally implicit.
+ template <typename OtherElementType>
+ DeviceMemorySpan(DeviceMemorySpan<OtherElementType> &&ASpan)
+ : ThePlatform(ASpan.ThePlatform),
+ TheHandle(static_cast<pointer>(ASpan.baseHandle())),
+ TheSize(ASpan.size()), TheOffset(ASpan.offset()),
+ TheSpanHandle(nullptr) {}
+
+ // Intentionally implicit.
+ template <typename OtherElementType>
+ DeviceMemorySpan(DeviceMemory<OtherElementType> &Memory)
+ : ThePlatform(Memory.ThePlatform),
+ TheHandle(static_cast<value_type *>(Memory.handle())),
+ TheSize(Memory.size()), TheOffset(0), TheSpanHandle(nullptr) {}
+
+ ~DeviceMemorySpan() {
+ if (TheSpanHandle) {
+ ThePlatform->rawDestroyDeviceMemorySpanHandle(
+ const_cast<value_type *>(TheSpanHandle));
+ }
+ }
+
+ /// \name observers
+ /// \{
+ index_type length() const { return TheSize; }
+ index_type size() const { return TheSize; }
+ index_type byte_size() const { // NOLINT
+ return TheSize * sizeof(element_type);
+ }
+ index_type offset() const { return TheOffset; }
+ index_type byte_offset() const { // NOLINT
+ return TheOffset * sizeof(element_type);
+ }
+ bool empty() const { return TheSize == 0; }
+ /// \}
+
+ void *baseHandle() const {
+ return static_cast<void *>(const_cast<value_type *>(TheHandle));
+ }
+
+ /// Casts to a host memory pointer.
+ ///
+ /// This is only guaranteed to make sense for the CUDA platform, where device
+ /// pointers can be stored and manipulated much like host pointers. This makes
+ /// it easy to do triple-chevron kernel launches in CUDA because
+ /// DeviceMemorySpan values can be passed to parameters expecting regular
+ /// pointers.
+ ///
+ /// If the CUDA platform is using unified memory, it may also be possible to
+ /// dereference this pointer on the host.
+ ///
+ /// For platforms other than CUDA, this may return a garbage pointer.
+ operator element_type *() const {
+ if (!TheSpanHandle)
+ TheSpanHandle = ThePlatform->getDeviceMemorySpanHandle(
+ TheHandle, TheSize * sizeof(element_type),
+ TheOffset * sizeof(element_type));
+ return TheSpanHandle;
+ }
+
+ DeviceMemorySpan<element_type> first(index_type Count) const {
+ bool Valid = Count >= 0 && Count <= TheSize;
+ if (!Valid)
+ std::terminate();
+ return DeviceMemorySpan<element_type>(ThePlatform, TheHandle, Count,
+ TheOffset);
+ }
+
+ DeviceMemorySpan<element_type> last(index_type Count) const {
+ bool Valid = Count >= 0 && Count <= TheSize;
+ if (!Valid)
+ std::terminate();
+ return DeviceMemorySpan<element_type>(ThePlatform, TheHandle, Count,
+ TheOffset + TheSize - Count);
+ }
+
+ DeviceMemorySpan<element_type>
+ subspan(index_type Offset, index_type Count = dynamic_extent) const {
+ bool Valid =
+ (Offset == 0 || (Offset > 0 && Offset <= TheSize)) &&
+ (Count == dynamic_extent || (Count >= 0 && Offset + Count <= TheSize));
+ if (!Valid)
+ std::terminate();
+ return DeviceMemorySpan<element_type>(ThePlatform, TheHandle, Count,
+ TheOffset + Offset);
+ }
+
+private:
+ template <typename T> friend class DeviceMemory;
+ template <typename T> friend class DeviceMemorySpan;
+ friend class Platform;
+
+ DeviceMemorySpan(Platform *ThePlatform, pointer AHandle, index_type Size,
+ index_type Offset)
+ : ThePlatform(ThePlatform), TheHandle(AHandle), TheSize(Size),
+ TheOffset(Offset), TheSpanHandle(nullptr) {}
+
+ Platform *ThePlatform;
+ pointer TheHandle;
+ index_type TheSize;
+ index_type TheOffset;
+ pointer TheSpanHandle;
+};
+
+/// Asynchronous host memory.
+///
+/// This memory is pinned or otherwise registered in the host memory space to
+/// allow for asynchronous copies between it and device memory.
+///
+/// This memory unpins/unregisters itself when it goes out of scope, but does
+/// not free itself.
+template <typename ElementType> class AsyncHostMemory {
+public:
+ using value_type = ElementType;
+ using remove_const_type = typename std::remove_const<ElementType>::type;
+
+ AsyncHostMemory(const AsyncHostMemory &) = delete;
+ AsyncHostMemory &operator=(const AsyncHostMemory &) = delete;
+ AsyncHostMemory(AsyncHostMemory &&) noexcept;
+ AsyncHostMemory &operator=(AsyncHostMemory &&) noexcept;
+ ~AsyncHostMemory() = default;
+
+ template <typename OtherElementType>
+ AsyncHostMemory(AsyncHostMemory<OtherElementType> &&Other)
+ : ThePointer(std::move(Other.ThePointer)),
+ TheElementCount(Other.TheElementCount) {
+ static_assert(
+ std::is_assignable<ElementType *, OtherElementType *>::value,
+ "cannot assign OtherElementType pointer to ElementType pointer type");
+ }
+
+ ElementType *data() const {
+ return const_cast<ElementType *>(
+ static_cast<remove_const_type *>(ThePointer.get()));
+ }
+ ptrdiff_t size() const { return TheElementCount; }
+
+private:
+ template <typename U> friend class AsyncHostMemory;
+ friend class Platform;
+ AsyncHostMemory(ElementType *Pointer, ptrdiff_t ElementCount,
+ HandleDestructor Destructor)
+ : ThePointer(
+ static_cast<void *>(const_cast<remove_const_type *>(Pointer)),
+ Destructor),
+ TheElementCount(ElementCount) {}
+
+ std::unique_ptr<void, HandleDestructor> ThePointer;
+ ptrdiff_t TheElementCount;
+};
+
+template <typename T>
+AsyncHostMemory<T>::AsyncHostMemory(AsyncHostMemory &&) noexcept = default;
+template <typename T>
+AsyncHostMemory<T> &AsyncHostMemory<T>::
+operator=(AsyncHostMemory &&) noexcept = default;
+
+/// Owned registered host memory.
+///
+/// Like AsyncHostMemory, but this memory also frees itself in addition to
+/// unpinning/unregistering itself when it goes out of scope.
+template <typename ElementType> class OwnedAsyncHostMemory {
+public:
+ using remove_const_type = typename std::remove_const<ElementType>::type;
+
+ OwnedAsyncHostMemory(const OwnedAsyncHostMemory &) = delete;
+ OwnedAsyncHostMemory &operator=(const OwnedAsyncHostMemory &) = delete;
+ OwnedAsyncHostMemory(OwnedAsyncHostMemory &&) noexcept;
+ OwnedAsyncHostMemory &operator=(OwnedAsyncHostMemory &&) noexcept;
+
+ ~OwnedAsyncHostMemory() {
+ if (ThePointer.get()) {
+ // We use placement new to construct these objects, so we have to call the
+ // destructors explicitly.
+ for (ptrdiff_t I = 0; I < TheElementCount; ++I)
+ static_cast<ElementType *>(ThePointer.get())[I].~ElementType();
+ }
+ }
+
+ ElementType *get() const {
+ return const_cast<ElementType *>(
+ static_cast<remove_const_type *>(ThePointer.get()));
+ }
+
+ ElementType &operator[](ptrdiff_t I) const {
+ assert(I >= 0 && I < TheElementCount);
+ return get()[I];
+ }
+
+private:
+ template <typename T> friend class AsyncHostMemorySpan;
+
+ friend class Platform;
+
+ OwnedAsyncHostMemory(void *Memory, ptrdiff_t ElementCount,
+ HandleDestructor Destructor)
+ : ThePointer(Memory, Destructor), TheElementCount(ElementCount) {}
+
+ std::unique_ptr<void, HandleDestructor> ThePointer;
+ ptrdiff_t TheElementCount;
+};
+
+template <typename T>
+OwnedAsyncHostMemory<T>::OwnedAsyncHostMemory(
+ OwnedAsyncHostMemory &&) noexcept = default;
+template <typename T>
+OwnedAsyncHostMemory<T> &OwnedAsyncHostMemory<T>::
+operator=(OwnedAsyncHostMemory &&) noexcept = default;
+
+/// View into registered host memory.
+///
+/// Like Span but for registered host memory.
+template <typename ElementType> class AsyncHostMemorySpan {
+public:
+ /// \name constants and types
+ /// \{
+ using element_type = ElementType;
+ using index_type = std::ptrdiff_t;
+ using pointer = element_type *;
+ using reference = element_type &;
+ using iterator = element_type *;
+ using const_iterator = const element_type *;
+ using value_type = typename std::remove_const<element_type>::type;
+ /// \}
+
+ AsyncHostMemorySpan() : TheSpan() {}
+
+ // Intentionally implicit.
+ template <typename OtherElementType>
+ AsyncHostMemorySpan(AsyncHostMemory<OtherElementType> &Memory)
+ : TheSpan(Memory.data(), Memory.size()) {}
+
+ // Intentionally implicit.
+ template <typename OtherElementType>
+ AsyncHostMemorySpan(OwnedAsyncHostMemory<OtherElementType> &Owned)
+ : TheSpan(Owned.get(), Owned.TheElementCount) {}
+
+ // Intentionally implicit.
+ template <typename OtherElementType>
+ AsyncHostMemorySpan(AsyncHostMemorySpan<OtherElementType> &ASpan)
+ : TheSpan(ASpan) {}
+
+ // Intentionally implicit.
+ template <typename OtherElementType>
+ AsyncHostMemorySpan(AsyncHostMemorySpan<OtherElementType> &&Span)
+ : TheSpan(Span) {}
+
+ /// \name observers
+ /// \{
+ index_type length() const { return TheSpan.length(); }
+ index_type size() const { return TheSpan.size(); }
+ index_type byte_size() const { // NOLINT
+ return TheSpan.size() * sizeof(element_type);
+ }
+ bool empty() const { return TheSpan.empty(); }
+ /// \}
+
+ pointer data() const noexcept { return TheSpan.data(); }
+ operator element_type *() const { return TheSpan.data(); }
+
+ AsyncHostMemorySpan<element_type> first(index_type Count) const {
+ return AsyncHostMemorySpan<element_type>(TheSpan.first(Count));
+ }
+
+ AsyncHostMemorySpan<element_type> last(index_type Count) const {
+ return AsyncHostMemorySpan<element_type>(TheSpan.last(Count));
+ }
+
+ AsyncHostMemorySpan<element_type>
+ subspan(index_type Offset, index_type Count = dynamic_extent) const {
+ return AsyncHostMemorySpan<element_type>(TheSpan.subspan(Offset, Count));
+ }
+
+private:
+ template <typename T> friend class AsyncHostMemory;
+
+ explicit AsyncHostMemorySpan(Span<ElementType> ArraySpan)
+ : TheSpan(ArraySpan) {}
+
+ Span<ElementType> TheSpan;
+};
+
+} // namespace acxxel
+
+#endif // ACXXEL_ACXXEL_H
--- /dev/null
+//===--- config.h - Macros generated during configuration -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file declares macros that are generated during the configuration stage
+/// of the build.
+///
+//===----------------------------------------------------------------------===//
+
+#cmakedefine ACXXEL_ENABLE_CUDA
+#cmakedefine ACXXEL_ENABLE_OPENCL
--- /dev/null
+//===--- cuda_acxxel.cpp - CUDA implementation of the Acxxel API ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file defines the standard CUDA implementation of the Acxxel API.
+///
+//===----------------------------------------------------------------------===//
+
+#include "acxxel.h"
+
+#include "cuda.h"
+#include "cuda_runtime.h"
+
+#include <array>
+#include <cassert>
+#include <sstream>
+#include <vector>
+
+namespace acxxel {
+
+namespace {
+
+/// Index of active device for this thread.
+thread_local int ActiveDeviceIndex = 0;
+
+static std::string getCUErrorMessage(CUresult Result) {
+ if (!Result)
+ return "success";
+ const char *ErrorName = "UNKNOWN_ERROR_NAME";
+ const char *ErrorDescription = "UNKNOWN_ERROR_DESCRIPTION";
+ cuGetErrorName(Result, &ErrorName);
+ cuGetErrorString(Result, &ErrorDescription);
+ std::ostringstream OutStream;
+ OutStream << "CUDA driver error: code = " << Result
+ << ", name = " << ErrorName
+ << ", description = " << ErrorDescription;
+ return OutStream.str();
+}
+
+static Status getCUError(CUresult Result, const std::string &Message) {
+ if (!Result)
+ return Status();
+ std::ostringstream OutStream;
+ OutStream << getCUErrorMessage(Result) << ", message = " << Message;
+ return Status(OutStream.str());
+}
+
+static std::string getCUDAErrorMessage(cudaError_t E) {
+ if (!E)
+ return "success";
+ std::ostringstream OutStream;
+ OutStream << "CUDA runtime error: code = " << E
+ << ", name = " << cudaGetErrorName(E)
+ << ", description = " << cudaGetErrorString(E);
+ return OutStream.str();
+}
+
+static Status getCUDAError(cudaError_t E, const std::string &Message) {
+ if (!E)
+ return Status();
+ std::ostringstream OutStream;
+ OutStream << getCUDAErrorMessage(E) << ", message = " << Message;
+ return Status(OutStream.str());
+}
+
+static void logCUWarning(CUresult Result, const std::string &Message) {
+ if (Result) {
+ std::ostringstream OutStream;
+ OutStream << Message << ": " << getCUErrorMessage(Result);
+ logWarning(OutStream.str());
+ }
+}
+
+/// A CUDA Platform implementation.
+class CUDAPlatform : public Platform {
+public:
+ ~CUDAPlatform() override = default;
+
+ static Expected<CUDAPlatform> create();
+
+ Expected<int> getDeviceCount() override;
+
+ Status setActiveDeviceForThread(int DeviceIndex) override;
+
+ int getActiveDeviceForThread() override;
+
+ Expected<Stream> createStream() override;
+
+ Status streamSync(void *Stream) override;
+
+ Status streamWaitOnEvent(void *Stream, void *Event) override;
+
+ Expected<Event> createEvent() override;
+
+protected:
+ Expected<void *> rawMallocD(ptrdiff_t ByteCount) override;
+ HandleDestructor getDeviceMemoryHandleDestructor() override;
+ void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
+ size_t ByteOffset) override;
+ virtual void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
+
+ Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol) override;
+ Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol) override;
+
+ Status rawCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
+ void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) override;
+ Status rawCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
+ void *HostDst, ptrdiff_t ByteCount) override;
+ Status rawCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) override;
+
+ Status rawMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, ptrdiff_t ByteCount,
+ char ByteValue) override;
+
+ Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
+ HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
+
+ Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
+ HandleDestructor getFreeHostMemoryHandleDestructor() override;
+
+ Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
+ void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount, void *Stream) override;
+ Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
+ void *HostDst, ptrdiff_t ByteCount,
+ void *Stream) override;
+ Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
+ void *Stream) override;
+
+ Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
+ ptrdiff_t ByteCount, char ByteValue,
+ void *Stream) override;
+
+ Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
+
+ Expected<Program> createProgramFromSource(Span<const char> Source) override;
+
+ Status enqueueEvent(void *Event, void *Stream) override;
+ bool eventIsDone(void *Event) override;
+ Status eventSync(void *Event) override;
+ Expected<float> getSecondsBetweenEvents(void *StartEvent,
+ void *EndEvent) override;
+
+ Expected<void *> rawCreateKernel(void *Program,
+ const std::string &Name) override;
+ HandleDestructor getKernelHandleDestructor() override;
+
+ Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
+ KernelLaunchDimensions LaunchDimensions,
+ Span<void *> Arguments,
+ Span<size_t> ArgumentSizes,
+ size_t SharedMemoryBytes) override;
+
+private:
+ explicit CUDAPlatform(const std::vector<CUcontext> &Contexts)
+ : TheContexts(Contexts) {}
+
+ // Vector of contexts for each device.
+ std::vector<CUcontext> TheContexts;
+};
+
+Expected<CUDAPlatform> CUDAPlatform::create() {
+ std::vector<CUcontext> Contexts;
+ if (CUresult Result = cuInit(0))
+ return getCUError(Result, "cuInit");
+
+ int DeviceCount = 0;
+ if (CUresult Result = cuDeviceGetCount(&DeviceCount))
+ return getCUError(Result, "cuDeviceGetCount");
+
+ for (int I = 0; I < DeviceCount; ++I) {
+ CUdevice Device;
+ if (CUresult Result = cuDeviceGet(&Device, I))
+ return getCUError(Result, "cuDeviceGet");
+ CUcontext Context;
+ if (CUresult Result = cuDevicePrimaryCtxRetain(&Context, Device))
+ return getCUError(Result, "cuDevicePrimaryCtxRetain");
+ if (CUresult Result = cuCtxSetCurrent(Context))
+ return getCUError(Result, "cuCtxSetCurrent");
+ Contexts.emplace_back(Context);
+ }
+
+ return CUDAPlatform(Contexts);
+}
+
+Status CUDAPlatform::setActiveDeviceForThread(int DeviceIndex) {
+ if (static_cast<size_t>(DeviceIndex) >= TheContexts.size())
+ return Status("invalid device index for SetActiveDevice: " +
+ std::to_string(DeviceIndex));
+ ActiveDeviceIndex = DeviceIndex;
+ return getCUError(cuCtxSetCurrent(TheContexts[DeviceIndex]),
+ "setActiveDeviceForThread cuCtxSetCurrent");
+}
+
+int CUDAPlatform::getActiveDeviceForThread() { return ActiveDeviceIndex; }
+
+Expected<int> CUDAPlatform::getDeviceCount() {
+ int Count = 0;
+ if (CUresult Result = cuDeviceGetCount(&Count))
+ return getCUError(Result, "cuDeviceGetCount");
+ return Count;
+}
+
+static void cudaDestroyStream(void *H) {
+ logCUWarning(cuStreamDestroy(static_cast<CUstream_st *>(H)),
+ "cuStreamDestroy");
+}
+
+Expected<Stream> CUDAPlatform::createStream() {
+ unsigned int Flags = CU_STREAM_DEFAULT;
+ CUstream Handle;
+ if (CUresult Result = cuStreamCreate(&Handle, Flags))
+ return getCUError(Result, "cuStreamCreate");
+ return constructStream(this, Handle, cudaDestroyStream);
+}
+
+Status CUDAPlatform::streamSync(void *Stream) {
+ return getCUError(cuStreamSynchronize(static_cast<CUstream_st *>(Stream)),
+ "cuStreamSynchronize");
+}
+
+Status CUDAPlatform::streamWaitOnEvent(void *Stream, void *Event) {
+ // CUDA docs says flags must be 0.
+ unsigned int Flags = 0u;
+ return getCUError(cuStreamWaitEvent(static_cast<CUstream_st *>(Stream),
+ static_cast<CUevent_st *>(Event), Flags),
+ "cuStreamWaitEvent");
+}
+
+static void cudaDestroyEvent(void *H) {
+ logCUWarning(cuEventDestroy(static_cast<CUevent_st *>(H)), "cuEventDestroy");
+}
+
+Expected<Event> CUDAPlatform::createEvent() {
+ unsigned int Flags = CU_EVENT_DEFAULT;
+ CUevent Handle;
+ if (CUresult Result = cuEventCreate(&Handle, Flags))
+ return getCUError(Result, "cuEventCreate");
+ return constructEvent(this, Handle, cudaDestroyEvent);
+}
+
+Status CUDAPlatform::enqueueEvent(void *Event, void *Stream) {
+ return getCUError(cuEventRecord(static_cast<CUevent_st *>(Event),
+ static_cast<CUstream_st *>(Stream)),
+ "cuEventRecord");
+}
+
+bool CUDAPlatform::eventIsDone(void *Event) {
+ return cuEventQuery(static_cast<CUevent_st *>(Event)) != CUDA_ERROR_NOT_READY;
+}
+
+Status CUDAPlatform::eventSync(void *Event) {
+ return getCUError(cuEventSynchronize(static_cast<CUevent_st *>(Event)),
+ "cuEventSynchronize");
+}
+
+Expected<float> CUDAPlatform::getSecondsBetweenEvents(void *StartEvent,
+ void *EndEvent) {
+ float Milliseconds;
+ if (CUresult Result = cuEventElapsedTime(
+ &Milliseconds, static_cast<CUevent_st *>(StartEvent),
+ static_cast<CUevent_st *>(EndEvent)))
+ return getCUError(Result, "cuEventElapsedTime");
+ return Milliseconds * 1e-6;
+}
+
+Expected<void *> CUDAPlatform::rawMallocD(ptrdiff_t ByteCount) {
+ if (!ByteCount)
+ return nullptr;
+ CUdeviceptr Pointer;
+ if (CUresult Result = cuMemAlloc(&Pointer, ByteCount))
+ return getCUError(Result, "cuMemAlloc");
+ return reinterpret_cast<void *>(Pointer);
+}
+
+static void cudaDestroyDeviceMemory(void *H) {
+ logCUWarning(cuMemFree(reinterpret_cast<CUdeviceptr>(H)), "cuMemFree");
+}
+
+HandleDestructor CUDAPlatform::getDeviceMemoryHandleDestructor() {
+ return cudaDestroyDeviceMemory;
+}
+
+void *CUDAPlatform::getDeviceMemorySpanHandle(void *BaseHandle, size_t,
+ size_t ByteOffset) {
+ return static_cast<char *>(BaseHandle) + ByteOffset;
+}
+
+void CUDAPlatform::rawDestroyDeviceMemorySpanHandle(void *) {
+ // Do nothing for this platform.
+}
+
+Expected<void *> CUDAPlatform::rawGetDeviceSymbolAddress(const void *Symbol) {
+ void *Address;
+ if (cudaError_t Status = cudaGetSymbolAddress(&Address, Symbol))
+ return getCUDAError(Status, "cudaGetSymbolAddress");
+ return Address;
+}
+
+Expected<ptrdiff_t> CUDAPlatform::rawGetDeviceSymbolSize(const void *Symbol) {
+ size_t Size;
+ if (cudaError_t Status = cudaGetSymbolSize(&Size, Symbol))
+ return getCUDAError(Status, "cudaGetSymbolSize");
+ return Size;
+}
+
+static const void *offsetVoidPtr(const void *Ptr, ptrdiff_t ByteOffset) {
+ return static_cast<const void *>(static_cast<const char *>(Ptr) + ByteOffset);
+}
+
+static void *offsetVoidPtr(void *Ptr, ptrdiff_t ByteOffset) {
+ return static_cast<void *>(static_cast<char *>(Ptr) + ByteOffset);
+}
+
+Status CUDAPlatform::rawCopyDToD(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) {
+ return getCUError(cuMemcpyDtoD(reinterpret_cast<CUdeviceptr>(offsetVoidPtr(
+ DeviceDst, DeviceDstByteOffset)),
+ reinterpret_cast<CUdeviceptr>(offsetVoidPtr(
+ DeviceSrc, DeviceSrcByteOffset)),
+ ByteCount),
+ "cuMemcpyDtoD");
+}
+
+Status CUDAPlatform::rawCopyDToH(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset, void *HostDst,
+ ptrdiff_t ByteCount) {
+ return getCUError(
+ cuMemcpyDtoH(HostDst, reinterpret_cast<CUdeviceptr>(
+ offsetVoidPtr(DeviceSrc, DeviceSrcByteOffset)),
+ ByteCount),
+ "cuMemcpyDtoH");
+}
+
+Status CUDAPlatform::rawCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) {
+ return getCUError(cuMemcpyHtoD(reinterpret_cast<CUdeviceptr>(offsetVoidPtr(
+ DeviceDst, DeviceDstByteOffset)),
+ HostSrc, ByteCount),
+ "cuMemcpyHtoD");
+}
+
+Status CUDAPlatform::rawMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
+ ptrdiff_t ByteCount, char ByteValue) {
+ return getCUError(cuMemsetD8(reinterpret_cast<CUdeviceptr>(
+ offsetVoidPtr(DeviceDst, ByteOffset)),
+ ByteValue, ByteCount),
+ "cuMemsetD8");
+}
+
+Status CUDAPlatform::rawRegisterHostMem(const void *Memory,
+ ptrdiff_t ByteCount) {
+ unsigned int Flags = 0;
+ return getCUError(
+ cuMemHostRegister(const_cast<void *>(Memory), ByteCount, Flags),
+ "cuMemHostRegiser");
+}
+
+static void cudaUnregisterHostMemoryHandleDestructor(void *H) {
+ logCUWarning(cuMemHostUnregister(H), "cuMemHostUnregister");
+}
+
+HandleDestructor CUDAPlatform::getUnregisterHostMemoryHandleDestructor() {
+ return cudaUnregisterHostMemoryHandleDestructor;
+}
+
+Expected<void *> CUDAPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
+ unsigned int Flags = 0;
+ void *Memory;
+ if (CUresult Result = cuMemHostAlloc(&Memory, ByteCount, Flags))
+ return getCUError(Result, "cuMemHostAlloc");
+ return Memory;
+}
+
+static void cudaFreeHostMemoryHandleDestructor(void *H) {
+ logCUWarning(cuMemFreeHost(H), "cuMemFreeHost");
+}
+
+HandleDestructor CUDAPlatform::getFreeHostMemoryHandleDestructor() {
+ return cudaFreeHostMemoryHandleDestructor;
+}
+
+Status CUDAPlatform::asyncCopyDToD(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset,
+ void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount, void *Stream) {
+ return getCUError(
+ cuMemcpyDtoDAsync(reinterpret_cast<CUdeviceptr>(
+ offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
+ reinterpret_cast<CUdeviceptr>(
+ offsetVoidPtr(DeviceSrc, DeviceSrcByteOffset)),
+ ByteCount, static_cast<CUstream_st *>(Stream)),
+ "cuMemcpyDtoDAsync");
+}
+
+Status CUDAPlatform::asyncCopyDToH(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset, void *HostDst,
+ ptrdiff_t ByteCount, void *Stream) {
+ return getCUError(
+ cuMemcpyDtoHAsync(HostDst, reinterpret_cast<CUdeviceptr>(offsetVoidPtr(
+ DeviceSrc, DeviceSrcByteOffset)),
+ ByteCount, static_cast<CUstream_st *>(Stream)),
+ "cuMemcpyDtoHAsync");
+}
+
+Status CUDAPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount, void *Stream) {
+ return getCUError(
+ cuMemcpyHtoDAsync(reinterpret_cast<CUdeviceptr>(
+ offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
+ HostSrc, ByteCount, static_cast<CUstream_st *>(Stream)),
+ "cuMemcpyHtoDAsync");
+}
+
+Status CUDAPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
+ ptrdiff_t ByteCount, char ByteValue,
+ void *Stream) {
+ return getCUError(
+ cuMemsetD8Async(
+ reinterpret_cast<CUdeviceptr>(offsetVoidPtr(DeviceDst, ByteOffset)),
+ ByteValue, ByteCount, static_cast<CUstream_st *>(Stream)),
+ "cuMemsetD8Async");
+}
+
+struct StreamCallbackUserData {
+ StreamCallbackUserData(Stream &Stream, StreamCallback Function)
+ : TheStream(Stream), TheFunction(std::move(Function)) {}
+
+ Stream &TheStream;
+ StreamCallback TheFunction;
+};
+
+static void CUDA_CB cuStreamCallbackShim(CUstream HStream, CUresult Status,
+ void *UserData) {
+ std::unique_ptr<StreamCallbackUserData> Data(
+ static_cast<StreamCallbackUserData *>(UserData));
+ Stream &TheStream = Data->TheStream;
+ assert(static_cast<CUstream_st *>(TheStream) == HStream);
+ Data->TheFunction(TheStream,
+ getCUError(Status, "stream callback error state"));
+}
+
+Status CUDAPlatform::addStreamCallback(Stream &Stream,
+ StreamCallback Callback) {
+ // CUDA docs say flags must always be 0 here.
+ unsigned int Flags = 0u;
+ std::unique_ptr<StreamCallbackUserData> UserData(
+ new StreamCallbackUserData(Stream, std::move(Callback)));
+ return getCUError(cuStreamAddCallback(Stream, cuStreamCallbackShim,
+ UserData.release(), Flags),
+ "cuStreamAddCallback");
+}
+
+static void cudaDestroyProgram(void *H) {
+ logCUWarning(cuModuleUnload(static_cast<CUmod_st *>(H)), "cuModuleUnload");
+}
+
+Expected<Program>
+CUDAPlatform::createProgramFromSource(Span<const char> Source) {
+ CUmodule Module;
+ constexpr int LogBufferSizeBytes = 1024;
+ char InfoLogBuffer[LogBufferSizeBytes];
+ char ErrorLogBuffer[LogBufferSizeBytes];
+ constexpr size_t OptionsCount = 4;
+ std::array<CUjit_option, OptionsCount> OptionNames = {
+ {CU_JIT_INFO_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+ CU_JIT_ERROR_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}};
+ std::array<void *, OptionsCount> OptionValues = {
+ {InfoLogBuffer, const_cast<int *>(&LogBufferSizeBytes), ErrorLogBuffer,
+ const_cast<int *>(&LogBufferSizeBytes)}};
+ if (CUresult Result =
+ cuModuleLoadDataEx(&Module, Source.data(), OptionsCount,
+ OptionNames.data(), OptionValues.data())) {
+ InfoLogBuffer[LogBufferSizeBytes - 1] = '\0';
+ ErrorLogBuffer[LogBufferSizeBytes - 1] = '\0';
+ std::ostringstream OutStream;
+ OutStream << "Error creating program from source: "
+ << getCUErrorMessage(Result)
+ << "\nINFO MESSAGES\n================\n"
+ << InfoLogBuffer << "\nERROR MESSAGES\n==================\n"
+ << ErrorLogBuffer;
+ return Status(OutStream.str());
+ }
+ return constructProgram(this, Module, cudaDestroyProgram);
+}
+
+Expected<void *> CUDAPlatform::rawCreateKernel(void *Program,
+ const std::string &Name) {
+ CUmodule Module = static_cast<CUmodule>(Program);
+ CUfunction Kernel;
+ if (CUresult Result = cuModuleGetFunction(&Kernel, Module, Name.c_str()))
+ return getCUError(Result, "cuModuleGetFunction");
+ return Kernel;
+}
+
+static void cudaDestroyKernel(void *) {
+ // Do nothing.
+}
+
+HandleDestructor CUDAPlatform::getKernelHandleDestructor() {
+ return cudaDestroyKernel;
+}
+
+Status CUDAPlatform::rawEnqueueKernelLaunch(
+ void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
+ Span<void *> Arguments, Span<size_t>, size_t SharedMemoryBytes) {
+ return getCUError(
+ cuLaunchKernel(static_cast<CUfunction>(Kernel), LaunchDimensions.GridX,
+ LaunchDimensions.GridY, LaunchDimensions.GridZ,
+ LaunchDimensions.BlockX, LaunchDimensions.BlockY,
+ LaunchDimensions.BlockZ, SharedMemoryBytes,
+ static_cast<CUstream>(Stream), Arguments.data(), nullptr),
+ "cuLaunchKernel");
+}
+
+} // namespace
+
+namespace cuda {
+
+/// Gets the CUDAPlatform instance and returns it as an unowned pointer to a
+/// Platform.
+Expected<Platform *> getPlatform() {
+ static auto MaybePlatform = []() -> Expected<CUDAPlatform *> {
+ Expected<CUDAPlatform> CreationResult = CUDAPlatform::create();
+ if (CreationResult.isError())
+ return CreationResult.getError();
+ else
+ return new CUDAPlatform(CreationResult.takeValue());
+ }();
+ return MaybePlatform;
+}
+
+} // namespace cuda
+
+} // namespace acxxel
--- /dev/null
+body {
+ background-color: #e0e0eb;
+}
+
+div.header {
+ margin-left: auto;
+ margin-right: auto;
+ max-width: 60em;
+ padding-left: 2em;
+ padding-right: 2em;
+}
+
+div.contents {
+ margin-left: auto;
+ margin-right: auto;
+ max-width: 60em;
+ background-color: white;
+ padding: 2em;
+ border-radius: 1em;
+}
--- /dev/null
+set(CUDA_HOST_COMPILER gcc)
+set(CUDA_NVCC_FLAGS -std=c++11)
+
+if(ACXXEL_ENABLE_CUDA)
+cuda_add_executable(simple_example simple_example.cu)
+target_link_libraries(simple_example acxxel)
+endif()
+
+if(ACXXEL_ENABLE_OPENCL)
+add_executable(opencl_example opencl_example.cpp)
+target_link_libraries(opencl_example acxxel ${OpenCL_LIBRARIES})
+endif()
--- /dev/null
+//===--- opencl_example.cpp - Example of using Acxxel with OpenCL ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file is an example of using OpenCL with Acxxel.
+///
+//===----------------------------------------------------------------------===//
+
+#include "acxxel.h"
+
+#include <array>
+#include <cstdio>
+#include <cstring>
+
+static const char *SaxpyKernelSource = R"(
+__kernel void saxpyKernel(float A, __global float *X, __global float *Y, int N) {
+ int I = get_global_id(0);
+ if (I < N)
+ X[I] = A * X[I] + Y[I];
+}
+)";
+
+template <size_t N>
+void saxpy(float A, std::array<float, N> &X, const std::array<float, N> &Y) {
+ acxxel::Platform *OpenCL = acxxel::getOpenCLPlatform().getValue();
+ acxxel::Stream Stream = OpenCL->createStream().takeValue();
+ auto DeviceX = OpenCL->mallocD<float>(N).takeValue();
+ auto DeviceY = OpenCL->mallocD<float>(N).takeValue();
+ Stream.syncCopyHToD(X, DeviceX).syncCopyHToD(Y, DeviceY);
+ acxxel::Program Program =
+ OpenCL
+ ->createProgramFromSource(acxxel::Span<const char>(
+ SaxpyKernelSource, std::strlen(SaxpyKernelSource)))
+ .takeValue();
+ acxxel::Kernel Kernel = Program.createKernel("saxpyKernel").takeValue();
+ float *RawX = static_cast<float *>(DeviceX);
+ float *RawY = static_cast<float *>(DeviceY);
+ int IntLength = N;
+ void *Arguments[] = {&A, &RawX, &RawY, &IntLength};
+ size_t ArgumentSizes[] = {sizeof(float), sizeof(float *), sizeof(float *),
+ sizeof(int)};
+ acxxel::Status Status =
+ Stream.asyncKernelLaunch(Kernel, N, Arguments, ArgumentSizes)
+ .syncCopyDToH(DeviceX, X)
+ .sync();
+ if (Status.isError()) {
+ std::fprintf(stderr, "Error during saxpy: %s\n",
+ Status.getMessage().c_str());
+ std::exit(EXIT_FAILURE);
+ }
+}
+
+int main() {
+ float A = 2.f;
+ std::array<float, 3> X = {0.f, 1.f, 2.f};
+ std::array<float, 3> Y = {3.f, 4.f, 5.f};
+ std::array<float, 3> Expected = {3.f, 6.f, 9.f};
+ saxpy(A, X, Y);
+ for (int I = 0; I < 3; ++I)
+ if (X[I] != Expected[I]) {
+ std::fprintf(stderr, "Mismatch at position %d, %f != %f\n", I, X[I],
+ Expected[I]);
+ std::exit(EXIT_FAILURE);
+ }
+}
--- /dev/null
+//===--- simple_example.cu - Simple example of using Acxxel ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file is a simple example of using Acxxel.
+///
+//===----------------------------------------------------------------------===//
+
+/// [Example simple saxpy]
+#include "acxxel.h"
+
+#include <array>
+#include <cstdio>
+#include <cstdlib>
+
+// A standard CUDA kernel.
+__global__ void saxpyKernel(float A, float *X, float *Y, int N) {
+ int I = (blockDim.x * blockIdx.x) + threadIdx.x;
+ if (I < N)
+ X[I] = A * X[I] + Y[I];
+}
+
+// A host library wrapping the CUDA kernel. All Acxxel calls are in here.
+template <size_t N>
+void saxpy(float A, std::array<float, N> &X, const std::array<float, N> &Y) {
+ // Get the CUDA platform and make a CUDA stream.
+ acxxel::Platform *CUDA = acxxel::getCUDAPlatform().getValue();
+ acxxel::Stream Stream = CUDA->createStream().takeValue();
+
+ // Allocate space for device arrays.
+ auto DeviceX = CUDA->mallocD<float>(N).takeValue();
+ auto DeviceY = CUDA->mallocD<float>(N).takeValue();
+
+ // Copy X and Y out to the device.
+ Stream.syncCopyHToD(X, DeviceX).syncCopyHToD(Y, DeviceY);
+
+ // Launch the kernel using triple-chevron notation.
+ saxpyKernel<<<1, N, 0, Stream>>>(A, DeviceX, DeviceY, N);
+
+ // Copy the results back to the host.
+ acxxel::Status Status = Stream.syncCopyDToH(DeviceX, X).takeStatus();
+
+ // Check for any errors.
+ if (Status.isError()) {
+ std::fprintf(stderr, "Error performing acxxel saxpy: %s\n",
+ Status.getMessage().c_str());
+ std::exit(EXIT_FAILURE);
+ }
+}
+/// [Example simple saxpy]
+
+/// [Example CUDA simple saxpy]
+template <size_t N>
+void cudaSaxpy(float A, std::array<float, N> &X, std::array<float, N> &Y) {
+ // This size is needed all over the place, so give it a name.
+ constexpr size_t Size = N * sizeof(float);
+
+ // Allocate space for device arrays.
+ float *DeviceX;
+ float *DeviceY;
+ cudaMalloc(&DeviceX, Size);
+ cudaMalloc(&DeviceY, Size);
+
+ // Copy X and Y out to the device.
+ cudaMemcpy(DeviceX, X.data(), Size, cudaMemcpyHostToDevice);
+ cudaMemcpy(DeviceY, Y.data(), Size, cudaMemcpyHostToDevice);
+
+ // Launch the kernel using triple-chevron notation.
+ saxpyKernel<<<1, N>>>(A, DeviceX, DeviceY, N);
+
+ // Copy the results back to the host.
+ cudaMemcpy(X.data(), DeviceX, Size, cudaMemcpyDeviceToHost);
+
+ // Free resources.
+ cudaFree(DeviceX);
+ cudaFree(DeviceY);
+
+ // Check for any errors.
+ cudaError_t Error = cudaGetLastError();
+ if (Error) {
+ std::fprintf(stderr, "Error performing cudart saxpy: %s\n",
+ cudaGetErrorString(Error));
+ std::exit(EXIT_FAILURE);
+ }
+}
+/// [Example CUDA simple saxpy]
+
+template <typename F> void testSaxpy(F &&SaxpyFunction) {
+ float A = 2.f;
+ std::array<float, 3> X = {{0.f, 1.f, 2.f}};
+ std::array<float, 3> Y = {{3.f, 4.f, 5.f}};
+ std::array<float, 3> Expected = {{3.f, 6.f, 9.f}};
+ SaxpyFunction(A, X, Y);
+ for (int I = 0; I < 3; ++I)
+ if (X[I] != Expected[I]) {
+ std::fprintf(stderr, "Result mismatch at index %d, %f != %f\n", I, X[I],
+ Expected[I]);
+ std::exit(EXIT_FAILURE);
+ }
+}
+
+int main() {
+ testSaxpy(saxpy<3>);
+ testSaxpy(cudaSaxpy<3>);
+}
--- /dev/null
+//===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file defines the standard OpenCL implementation of the Acxxel API.
+///
+//===----------------------------------------------------------------------===//
+
+#include "acxxel.h"
+
+#include "CL/cl.h"
+
+#include <mutex>
+#include <sstream>
+#include <utility>
+#include <vector>
+
+namespace acxxel {
+
+namespace {
+
+/// An ID containing the platform ID and the device ID within the platform.
+struct FullDeviceID {
+ cl_platform_id PlatformID;
+ cl_device_id DeviceID;
+
+ FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID)
+ : PlatformID(PlatformID), DeviceID(DeviceID) {}
+};
+
+thread_local int ActiveDeviceIndex = 0;
+
+static std::string getOpenCLErrorMessage(cl_int Result) {
+ if (!Result)
+ return "success";
+ std::ostringstream OutStream;
+ OutStream << "OpenCL error: code = " << Result;
+ return OutStream.str();
+}
+
+static Status getOpenCLError(cl_int Result, const std::string &Message) {
+ if (!Result)
+ return Status();
+ std::ostringstream OutStream;
+ OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message;
+ return Status(OutStream.str());
+}
+
+static void logOpenCLWarning(cl_int Result, const std::string &Message) {
+ if (Result) {
+ std::ostringstream OutStream;
+ OutStream << Message << ": " << getOpenCLErrorMessage(Result);
+ logWarning(OutStream.str());
+ }
+}
+
+class OpenCLPlatform : public Platform {
+public:
+ ~OpenCLPlatform() override = default;
+
+ static Expected<OpenCLPlatform> create();
+
+ Expected<int> getDeviceCount() override;
+
+ Status setActiveDeviceForThread(int DeviceIndex) override;
+
+ int getActiveDeviceForThread() override;
+
+ Expected<Stream> createStream() override;
+
+ Expected<Event> createEvent() override;
+
+ Expected<Program> createProgramFromSource(Span<const char> Source) override;
+
+protected:
+ Status streamSync(void *Stream) override;
+
+ Status streamWaitOnEvent(void *Stream, void *Event) override;
+
+ Expected<void *> rawMallocD(ptrdiff_t ByteCount) override;
+ HandleDestructor getDeviceMemoryHandleDestructor() override;
+ void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
+ size_t ByteOffset) override;
+ void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
+
+ Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol) override;
+ Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol) override;
+
+ Status rawCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
+ void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) override;
+ Status rawCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
+ void *HostDst, ptrdiff_t ByteCount) override;
+ Status rawCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) override;
+
+ Status rawMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, ptrdiff_t ByteCount,
+ char ByteValue) override;
+
+ Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
+ HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
+
+ Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
+ HandleDestructor getFreeHostMemoryHandleDestructor() override;
+
+ Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
+ void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount, void *Stream) override;
+ Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
+ void *HostDst, ptrdiff_t ByteCount,
+ void *Stream) override;
+ Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
+ void *Stream) override;
+
+ Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
+ ptrdiff_t ByteCount, char ByteValue,
+ void *Stream) override;
+
+ Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
+
+ Status enqueueEvent(void *Event, void *Stream) override;
+ bool eventIsDone(void *Event) override;
+ Status eventSync(void *Event) override;
+ Expected<float> getSecondsBetweenEvents(void *StartEvent,
+ void *EndEvent) override;
+
+ Expected<void *> rawCreateKernel(void *Program,
+ const std::string &Name) override;
+ HandleDestructor getKernelHandleDestructor() override;
+
+ Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
+ KernelLaunchDimensions LaunchDimensions,
+ Span<void *> Arguments,
+ Span<size_t> ArgumentSizes,
+ size_t SharedMemoryBytes) override;
+
+private:
+ OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs,
+ std::vector<cl_context> &&Contexts,
+ std::vector<cl_command_queue> &&CommandQueues)
+ : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)),
+ CommandQueues(std::move(CommandQueues)) {}
+
+ std::vector<FullDeviceID> FullDeviceIDs;
+ std::vector<cl_context> Contexts;
+ std::vector<cl_command_queue> CommandQueues;
+};
+
+Expected<OpenCLPlatform> OpenCLPlatform::create() {
+ constexpr cl_uint MaxNumEntries = 100;
+ cl_platform_id Platforms[MaxNumEntries];
+ cl_uint NumPlatforms;
+ if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms))
+ return getOpenCLError(Result, "clGetPlatformIDs");
+
+ std::vector<FullDeviceID> FullDeviceIDs;
+ for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms;
+ ++PlatformIndex) {
+ cl_uint NumDevices;
+ cl_device_id Devices[MaxNumEntries];
+ if (cl_int Result =
+ clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL,
+ MaxNumEntries, Devices, &NumDevices))
+ return getOpenCLError(Result, "clGetDeviceIDs");
+ for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex)
+ FullDeviceIDs.emplace_back(Platforms[PlatformIndex],
+ Devices[DeviceIndex]);
+ }
+
+ if (FullDeviceIDs.empty())
+ return Status("No OpenCL device available on this system.");
+
+ std::vector<cl_context> Contexts(FullDeviceIDs.size());
+ std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size());
+ for (size_t I = 0; I < FullDeviceIDs.size(); ++I) {
+ cl_int CreateContextResult;
+ Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID,
+ nullptr, nullptr, &CreateContextResult);
+ if (CreateContextResult)
+ return getOpenCLError(CreateContextResult, "clCreateContext");
+
+ cl_int CreateCommandQueueResult;
+ CommandQueues[I] = clCreateCommandQueue(
+ Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE,
+ &CreateCommandQueueResult);
+ if (CreateCommandQueueResult)
+ return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue");
+ }
+
+ return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts),
+ std::move(CommandQueues));
+}
+
+Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); }
+
+Status OpenCLPlatform::setActiveDeviceForThread(int DeviceIndex) {
+ if (static_cast<size_t>(DeviceIndex) >= FullDeviceIDs.size())
+ return Status("Could not set active device index to " +
+ std::to_string(DeviceIndex) + " because there are only " +
+ std::to_string(FullDeviceIDs.size()) +
+ " devices in the system");
+ ActiveDeviceIndex = DeviceIndex;
+ return Status();
+}
+
+int OpenCLPlatform::getActiveDeviceForThread() { return ActiveDeviceIndex; }
+
+static void openCLDestroyStream(void *H) {
+ logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)),
+ "clReleaseCommandQueue");
+}
+
+Expected<Stream> OpenCLPlatform::createStream() {
+ cl_int Result;
+ cl_command_queue Queue = clCreateCommandQueue(
+ Contexts[ActiveDeviceIndex], FullDeviceIDs[ActiveDeviceIndex].DeviceID,
+ CL_QUEUE_PROFILING_ENABLE, &Result);
+ if (Result)
+ return getOpenCLError(Result, "clCreateCommandQueue");
+ return constructStream(this, Queue, openCLDestroyStream);
+}
+
+static void openCLEventDestroy(void *H) {
+ cl_event *CLEvent = static_cast<cl_event *>(H);
+ logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent");
+ delete CLEvent;
+}
+
+Status OpenCLPlatform::streamSync(void *Stream) {
+ return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)),
+ "clFinish");
+}
+
+Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) {
+ cl_event *CLEvent = static_cast<cl_event *>(Event);
+ return getOpenCLError(
+ clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1,
+ CLEvent, nullptr),
+ "clEnqueueMarkerWithWaitList");
+}
+
+Expected<Event> OpenCLPlatform::createEvent() {
+ cl_int Result;
+ cl_event Event = clCreateUserEvent(Contexts[ActiveDeviceIndex], &Result);
+ if (Result)
+ return getOpenCLError(Result, "clCreateUserEvent");
+ if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE))
+ return getOpenCLError(Result, "clSetUserEventStatus");
+ return constructEvent(this, new cl_event(Event), openCLEventDestroy);
+}
+
+static void openCLDestroyProgram(void *H) {
+ logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)),
+ "clReleaseProgram");
+}
+
+Expected<Program>
+OpenCLPlatform::createProgramFromSource(Span<const char> Source) {
+ cl_int Error;
+ const char *CSource = Source.data();
+ size_t SourceSize = Source.size();
+ cl_program Program = clCreateProgramWithSource(Contexts[ActiveDeviceIndex], 1,
+ &CSource, &SourceSize, &Error);
+ if (Error)
+ return getOpenCLError(Error, "clCreateProgramWithSource");
+ cl_device_id DeviceID = FullDeviceIDs[ActiveDeviceIndex].DeviceID;
+ if (cl_int Error =
+ clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr))
+ return getOpenCLError(Error, "clBuildProgram");
+ return constructProgram(this, Program, openCLDestroyProgram);
+}
+
+Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount) {
+ cl_int Result;
+ cl_mem Memory = clCreateBuffer(Contexts[ActiveDeviceIndex], CL_MEM_READ_WRITE,
+ ByteCount, nullptr, &Result);
+ if (Result)
+ return getOpenCLError(Result, "clCreateBuffer");
+ return reinterpret_cast<void *>(Memory);
+}
+
+static void openCLDestroyDeviceMemory(void *H) {
+ logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)),
+ "clReleaseMemObject");
+}
+
+HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() {
+ return openCLDestroyDeviceMemory;
+}
+
+void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle,
+ size_t ByteSize,
+ size_t ByteOffset) {
+ cl_int Error;
+ cl_buffer_region Region;
+ Region.origin = ByteOffset;
+ Region.size = ByteSize;
+ cl_mem SubBuffer =
+ clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0,
+ CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error);
+ logOpenCLWarning(Error, "clCreateSubBuffer");
+ if (Error)
+ return nullptr;
+ return SubBuffer;
+}
+
+void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) {
+ openCLDestroyDeviceMemory(Handle);
+}
+
+Expected<void *>
+OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/) {
+ // This doesn't seem to have any equivalent in OpenCL.
+ return Status("not implemented");
+}
+
+Expected<ptrdiff_t>
+OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/) {
+ // This doesn't seem to have any equivalent in OpenCL.
+ return Status("not implemented");
+}
+
+Status OpenCLPlatform::rawCopyDToD(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset,
+ void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) {
+ cl_event DoneEvent;
+ if (cl_int Result = clEnqueueCopyBuffer(
+ CommandQueues[ActiveDeviceIndex],
+ static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
+ static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
+ DeviceDstByteOffset, ByteCount, 0, nullptr, &DoneEvent))
+ return getOpenCLError(Result, "clEnqueueCopyBuffer");
+ return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents");
+}
+
+Status OpenCLPlatform::rawCopyDToH(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset, void *HostDst,
+ ptrdiff_t ByteCount) {
+ cl_event DoneEvent;
+ if (cl_int Result = clEnqueueReadBuffer(
+ CommandQueues[ActiveDeviceIndex],
+ static_cast<cl_mem>(const_cast<void *>(DeviceSrc)), CL_TRUE,
+ DeviceSrcByteOffset, ByteCount, HostDst, 0, nullptr, &DoneEvent))
+ return getOpenCLError(Result, "clEnqueueReadBuffer");
+ return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents");
+}
+
+Status OpenCLPlatform::rawCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount) {
+ cl_event DoneEvent;
+ if (cl_int Result = clEnqueueWriteBuffer(
+ CommandQueues[ActiveDeviceIndex], static_cast<cl_mem>(DeviceDst),
+ CL_TRUE, DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
+ &DoneEvent))
+ return getOpenCLError(Result, "clEnqueueWriteBuffer");
+ return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents");
+}
+
+Status OpenCLPlatform::rawMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
+ ptrdiff_t ByteCount, char ByteValue) {
+ cl_event DoneEvent;
+ if (cl_int Result = clEnqueueFillBuffer(
+ CommandQueues[ActiveDeviceIndex], static_cast<cl_mem>(DeviceDst),
+ &ByteValue, 1, ByteOffset, ByteCount, 0, nullptr, &DoneEvent))
+ return getOpenCLError(Result, "clEnqueueFillBuffer");
+ return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents");
+}
+
+static void noOpHandleDestructor(void *) {}
+
+Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
+ ptrdiff_t /*ByteCount*/) {
+ // TODO(jhen): Do we want to do something to pin the memory here?
+ return Status();
+}
+
+HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
+ // TODO(jhen): Do we want to unpin the memory here?
+ return noOpHandleDestructor;
+}
+
+Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
+ // TODO(jhen): Do we want to do something to pin the memory here?
+ return std::malloc(ByteCount);
+}
+
+static void freeMemoryHandleDestructor(void *Memory) {
+ // TODO(jhen): Do we want to unpin the memory here?
+ std::free(Memory);
+}
+
+HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
+ return freeMemoryHandleDestructor;
+}
+
+Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset,
+ void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount, void *Stream) {
+ return getOpenCLError(
+ clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream),
+ static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
+ static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
+ DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr),
+ "clEnqueueCopyBuffer");
+}
+
+Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc,
+ ptrdiff_t DeviceSrcByteOffset,
+ void *HostDst, ptrdiff_t ByteCount,
+ void *Stream) {
+ return getOpenCLError(
+ clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream),
+ static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
+ CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0,
+ nullptr, nullptr),
+ "clEnqueueReadBuffer");
+}
+
+Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
+ ptrdiff_t DeviceDstByteOffset,
+ ptrdiff_t ByteCount, void *Stream) {
+ return getOpenCLError(
+ clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream),
+ static_cast<cl_mem>(DeviceDst), CL_TRUE,
+ DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
+ nullptr),
+ "clEnqueueWriteBuffer");
+}
+
+Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
+ ptrdiff_t ByteCount, char ByteValue,
+ void *Stream) {
+ return getOpenCLError(
+ clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream),
+ static_cast<cl_mem>(DeviceDst), &ByteValue, 1,
+ ByteOffset, ByteCount, 0, nullptr, nullptr),
+ "clEnqueueFillBuffer");
+}
+
+struct StreamCallbackUserData {
+ StreamCallbackUserData(Stream &TheStream, StreamCallback Function,
+ cl_event EndEvent)
+ : TheStream(TheStream), TheFunction(std::move(Function)),
+ EndEvent(EndEvent) {}
+
+ Stream &TheStream;
+ StreamCallback TheFunction;
+ cl_event EndEvent;
+};
+
+// A function with the right signature to pass to clSetEventCallback.
+void CL_CALLBACK openCLStreamCallbackShim(cl_event,
+ cl_int EventCommandExecStatus,
+ void *UserData) {
+ std::unique_ptr<StreamCallbackUserData> Data(
+ static_cast<StreamCallbackUserData *>(UserData));
+ Data->TheFunction(
+ Data->TheStream,
+ getOpenCLError(EventCommandExecStatus, "stream callback error state"));
+ if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE))
+ logOpenCLWarning(Result, "clSetUserEventStatus");
+ if (cl_int Result = clReleaseEvent(Data->EndEvent))
+ logOpenCLWarning(Result, "clReleaseEvent");
+}
+
+Status OpenCLPlatform::addStreamCallback(Stream &TheStream,
+ StreamCallback Callback) {
+ cl_int Result;
+ cl_event StartEvent = clCreateUserEvent(Contexts[ActiveDeviceIndex], &Result);
+ if (Result)
+ return getOpenCLError(Result, "clCreateUserEvent");
+ cl_event EndEvent = clCreateUserEvent(Contexts[ActiveDeviceIndex], &Result);
+ if (Result)
+ return getOpenCLError(Result, "clCreateUserEvent");
+ cl_event StartBarrierEvent;
+ if (cl_int Result = clEnqueueBarrierWithWaitList(
+ static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
+ &StartEvent, &StartBarrierEvent))
+ return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
+
+ if (cl_int Result = clEnqueueBarrierWithWaitList(
+ static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
+ &EndEvent, nullptr))
+ return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
+
+ std::unique_ptr<StreamCallbackUserData> UserData(
+ new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent));
+ if (cl_int Result =
+ clSetEventCallback(StartBarrierEvent, CL_RUNNING,
+ openCLStreamCallbackShim, UserData.release()))
+ return getOpenCLError(Result, "clSetEventCallback");
+
+ if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE))
+ return getOpenCLError(Result, "clSetUserEventStatus");
+
+ if (cl_int Result = clReleaseEvent(StartBarrierEvent))
+ return getOpenCLError(Result, "clReleaseEvent");
+
+ return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent");
+}
+
+Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) {
+ cl_event *CLEvent = static_cast<cl_event *>(Event);
+ cl_event OldEvent = *CLEvent;
+ cl_event NewEvent;
+ if (cl_int Result = clEnqueueMarkerWithWaitList(
+ static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent))
+ return getOpenCLError(Result, "clEnqueueMarkerWithWaitList");
+ *CLEvent = NewEvent;
+ return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent");
+}
+
+bool OpenCLPlatform::eventIsDone(void *Event) {
+ cl_event *CLEvent = static_cast<cl_event *>(Event);
+ cl_int EventStatus;
+ logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(EventStatus), &EventStatus, nullptr),
+ "clGetEventInfo");
+ return EventStatus == CL_COMPLETE || EventStatus < 0;
+}
+
+Status OpenCLPlatform::eventSync(void *Event) {
+ cl_event *CLEvent = static_cast<cl_event *>(Event);
+ return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents");
+}
+
+Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent,
+ void *EndEvent) {
+ cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent);
+ cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent);
+
+ cl_profiling_info ParamName = CL_PROFILING_COMMAND_END;
+ cl_ulong StartNanoseconds;
+ cl_ulong EndNanoseconds;
+ if (cl_int Result =
+ clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong),
+ &StartNanoseconds, nullptr))
+ return getOpenCLError(Result, "clGetEventProfilingInfo");
+ if (cl_int Result = clGetEventProfilingInfo(
+ *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr))
+ return getOpenCLError(Result, "clGetEventProfilingInfo");
+ return (EndNanoseconds - StartNanoseconds) * 1e-12;
+}
+
+Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program,
+ const std::string &Name) {
+
+ cl_int Error;
+ cl_kernel Kernel =
+ clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error);
+ if (Error)
+ return getOpenCLError(Error, "clCreateKernel");
+ return Kernel;
+}
+
+static void openCLDestroyKernel(void *H) {
+ logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)),
+ "clReleaseKernel");
+}
+
+HandleDestructor OpenCLPlatform::getKernelHandleDestructor() {
+ return openCLDestroyKernel;
+}
+
+Status OpenCLPlatform::rawEnqueueKernelLaunch(
+ void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
+ Span<void *> Arguments, Span<size_t> ArgumentSizes,
+ size_t SharedMemoryBytes) {
+ if (SharedMemoryBytes != 0)
+ return Status("OpenCL kernel launches only accept zero for the shared "
+ "memory byte size");
+ cl_kernel TheKernel = static_cast<cl_kernel>(Kernel);
+ for (int I = 0; I < Arguments.size(); ++I)
+ if (cl_int Error =
+ clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I]))
+ return getOpenCLError(Error, "clSetKernelArg");
+ size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY,
+ LaunchDimensions.BlockZ};
+ size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX,
+ LaunchDimensions.BlockY * LaunchDimensions.GridY,
+ LaunchDimensions.BlockZ * LaunchDimensions.GridZ};
+ return getOpenCLError(
+ clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel,
+ 3, nullptr, GlobalWorkSize, LocalWorkSize, 0,
+ nullptr, nullptr),
+ "clEnqueueNDRangeKernel");
+}
+
+} // namespace
+
+namespace opencl {
+
+/// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
+/// Platform.
+Expected<Platform *> getPlatform() {
+ static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> {
+ Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create();
+ if (CreationResult.isError())
+ return CreationResult.getError();
+ else
+ return new OpenCLPlatform(CreationResult.takeValue());
+ }();
+ return MaybePlatform;
+}
+
+} // namespace opencl
+
+} // namespace acxxel
--- /dev/null
+//===--- span- The span class -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ACXXEL_SPAN_H
+#define ACXXEL_SPAN_H
+
+#include <array>
+#include <cstddef>
+#include <exception>
+#include <iterator>
+#include <type_traits>
+
+namespace acxxel {
+
+/// Value used to indicate slicing to the end of the span.
+static constexpr std::ptrdiff_t dynamic_extent = -1; // NOLINT
+
+class SpanBase {};
+
+/// Implementation of the proposed C++17 std::span class.
+///
+/// Based on the paper:
+/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0122r1.pdf
+template <typename ElementType> class Span : public SpanBase {
+public:
+ /// \name constants and types
+ /// \{
+
+ using element_type = ElementType;
+ using index_type = std::ptrdiff_t;
+ using pointer = element_type *;
+ using reference = element_type &;
+ using iterator = element_type *;
+ using const_iterator = const element_type *;
+ using value_type = typename std::remove_const<element_type>::type;
+
+ /// \}
+
+ /// \name constructors, copy, assignment, and destructor.
+ /// \{
+
+ /// Constructs an empty span with null pointer data.
+ Span() : Data(nullptr), Size(0) {}
+
+ /// Constructs an empty span with null pointer data.
+ // Intentionally implicit.
+ Span(std::nullptr_t) : Data(nullptr), Size(0) {}
+
+ /// Constructs a span from a pointer and element count.
+ Span(pointer Ptr, index_type Count) : Data(Ptr), Size(Count) {
+ if (Count < 0 || (!Ptr && Count))
+ std::terminate();
+ }
+
+ /// Constructs a span from a pointer to the fist element in the range and a
+ /// pointer to one past the last element in the range.
+ Span(pointer FirstElem, pointer LastElem)
+ : Data(FirstElem), Size(std::distance(FirstElem, LastElem)) {
+ if (Size < 0)
+ std::terminate();
+ }
+
+ /// Constructs a span from an array.
+ // Intentionally implicit.
+ template <typename T, size_t N> Span(T (&Arr)[N]) : Data(Arr), Size(N) {}
+
+ /// Constructs a span from a std::array.
+ // Intentionally implicit.
+ template <size_t N>
+ Span(const std::array<typename std::remove_const<element_type>::type, N> &Arr)
+ : Data(Arr.data()), Size(N) {}
+
+ /// Constructs a span from a container such as a std::vector.
+ // TODO(jhen): Put in a check to make sure this constructor does not
+ // participate in overload resolution unless Container meets the following
+ // requirements:
+ // * Container is a contiguous container and a sequence container.
+ // Intentionally implicit.
+ template <typename Container>
+ Span(Container &Cont,
+ typename std::enable_if<
+ std::is_same<
+ typename std::remove_const<typename Container::value_type>::type,
+ typename std::remove_const<element_type>::type>::value &&
+ !std::is_array<Container>::value &&
+ !std::is_base_of<SpanBase, Container>::value &&
+ std::is_convertible<decltype(&Cont[0]), pointer>::value>::type * =
+ nullptr)
+ : Data(Cont.data()), Size(Cont.size()) {}
+
+ /// Avoids creating spans from expiring temporary objects.
+ // TODO(jhen): Put in a check to make sure this constructor does not
+ // participate in overload resolution unless Container meets the following
+ // requirements:
+ // * Container is a contiguous container and a sequence container.
+ template <typename Container>
+ Span(Container &&Cont,
+ typename std::enable_if<
+ std::is_same<
+ typename std::remove_const<typename Container::value_type>::type,
+ typename std::remove_const<element_type>::type>::value &&
+ !std::is_array<Container>::value &&
+ !std::is_base_of<SpanBase, Container>::value &&
+ std::is_convertible<decltype(&Cont[0]), pointer>::value>::type * =
+ nullptr) = delete;
+
+ Span(const Span &) noexcept = default;
+ Span(Span &&) noexcept;
+
+ /// Constructs a span from copying a span of another type that can be
+ /// implicitly converted to the type stored by the constructed span.
+ // Intentionally implicit.
+ template <typename OtherElementType>
+ Span(const Span<OtherElementType> &Other)
+ : Data(Other.Data), Size(Other.Size) {}
+
+ /// Constructs a span from moving a span of another type that can be
+ /// implicitly converted to the type stored by the constructed span.
+ // Intentionally implicit.
+ template <typename OtherElementType>
+ Span(Span<OtherElementType> &&Other) : Data(Other.Data), Size(Other.Size) {}
+
+ ~Span() = default;
+
+ Span &operator=(const Span &) noexcept = default;
+ Span &operator=(Span &&) noexcept;
+
+ /// \}
+
+ /// \name subviews
+ /// \{
+
+ /// Creates a span out of the first Count elements of this span.
+ Span<element_type> first(index_type Count) const {
+ bool Valid = Count >= 0 && Count <= size();
+ if (!Valid)
+ std::terminate();
+ return Span<element_type>(data(), Count);
+ }
+
+ /// Creates a span out of the last Count elements of this span.
+ Span<element_type> last(index_type Count) const {
+ bool Valid = Count >= 0 && Count <= size();
+ if (!Valid)
+ std::terminate();
+ return Span<element_type>(Count == 0 ? data() : data() + (size() - Count),
+ Count);
+ }
+
+ /// Creates a span out of the Count elements of this span beginning at Offset.
+ ///
+ /// If no arguments is provided for Count, the new span will extend to the end
+ /// of the current span.
+ Span<element_type> subspan(index_type Offset,
+ index_type Count = dynamic_extent) const {
+ bool Valid =
+ (Offset == 0 || (Offset > 0 && Offset <= size())) &&
+ (Count == dynamic_extent || (Count >= 0 && Offset + Count <= size()));
+ if (!Valid)
+ std::terminate();
+ return Span<element_type>(
+ data() + Offset, Count == dynamic_extent ? size() - Offset : Count);
+ }
+
+ /// \}
+
+ /// \name observers
+ /// \{
+
+ index_type length() const { return Size; }
+ index_type size() const { return Size; }
+ bool empty() const { return size() == 0; }
+
+ /// \}
+
+ /// \name element access
+ /// \{
+
+ reference operator[](index_type Idx) const {
+ bool Valid = Idx >= 0 && Idx < size();
+ if (!Valid)
+ std::terminate();
+ return Data[Idx];
+ }
+
+ reference operator()(index_type Idx) const { return operator[](Idx); }
+
+ pointer data() const noexcept { return Data; }
+
+ /// \}
+
+ /// \name iterator support
+ /// \{
+
+ iterator begin() const noexcept { return Data; }
+ iterator end() const noexcept { return Data + Size; }
+ const_iterator cbegin() const noexcept { return Data; }
+ const_iterator cend() const noexcept { return Data + Size; }
+
+ /// \}
+
+private:
+ template <typename OtherElementType> friend class Span;
+
+ pointer Data;
+ index_type Size;
+};
+
+template <typename ElementType>
+Span<ElementType>::Span(Span &&) noexcept = default;
+template <typename ElementType>
+Span<ElementType> &Span<ElementType>::operator=(Span &&) noexcept = default;
+
+} // namespace acxxel
+
+#endif // ACXXEL_SPAN_H
--- /dev/null
+//===--- status.h - Status and Expected classes -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ACXXEL_STATUS_H
+#define ACXXEL_STATUS_H
+
+#include <cassert>
+#include <string>
+
+// The clang compiler supports annotating class declarations with the
+// warn_unused_result attribute, and this has the meaning that whenever that
+// type is returned from a function, the function is marked as
+// warn_unused_result.
+//
+// The gcc compiler does not support warn_unused_result for classes, only for
+// functions, so we only use this feature with clang.
+#ifdef __clang__
+#define ACXXEL_WARN_UNUSED_RESULT_TYPE __attribute__((warn_unused_result))
+#else
+#define ACXXEL_WARN_UNUSED_RESULT_TYPE
+#endif
+
+namespace acxxel {
+
+/// Status type.
+///
+/// May represent failure with a string error message, or may indicate success.
+class ACXXEL_WARN_UNUSED_RESULT_TYPE Status {
+public:
+ /// Creates a Status representing success.
+ Status() : HasMessage(false) {}
+
+ /// Creates a Status representing failure with the given error message.
+ explicit Status(const std::string &Message)
+ : HasMessage(true), Message(Message) {}
+
+ Status(const Status &) = default;
+
+ Status &operator=(const Status &) = default;
+
+ Status(Status &&) noexcept = default;
+
+ // Cannot use default because the move assignment operator for std::string is
+ // not marked noexcept.
+ Status &operator=(Status &&That) noexcept {
+ HasMessage = That.HasMessage;
+ Message = std::move(That.Message);
+ return *this;
+ }
+
+ ~Status() = default;
+
+ /// Returns true if this Status represents failure. Otherwise, returns false.
+ bool isError() const { return HasMessage; }
+
+ /// Returns true if this Status represents success. Otherwise, returns false.
+ operator bool() const { return !HasMessage; }
+
+ /// Gets a reference to the error message for this Status.
+ ///
+ /// Should only be called if isError() returns true.
+ const std::string &getMessage() const { return Message; }
+
+private:
+ bool HasMessage;
+ std::string Message;
+};
+
+class ExpectedBase {
+protected:
+ enum class State {
+ SUCCESS,
+ FAILURE,
+ MOVED,
+ };
+};
+
+/// Either a value of type T or a Status representing failure.
+template <typename T> class Expected : public ExpectedBase {
+public:
+ /// Creates an Expected representing failure with the given Error status.
+ // Intentionally implicit.
+ Expected(Status AnError)
+ : TheState(State::FAILURE), TheError(std::move(AnError)) {
+ assert(AnError.isError() && "constructing an error Expected value from a "
+ "success status is not allowed");
+ }
+
+ /// Creates an Expected representing success with the given value.
+ // Intentionally implicit.
+ Expected(T Value) : TheState(State::SUCCESS), TheValue(std::move(Value)) {}
+
+ Expected(const Expected &That) : TheState(That.TheState) {
+ switch (TheState) {
+ case State::SUCCESS:
+ new (&TheValue) T(That.TheValue);
+ break;
+ case State::FAILURE:
+ new (&TheError) Status(That.TheError);
+ break;
+ case State::MOVED:
+ // Nothing to do in this case.
+ break;
+ }
+ }
+
+ Expected &operator=(Expected That) {
+ TheState = That.TheState;
+ switch (TheState) {
+ case State::SUCCESS:
+ new (&TheValue) T(std::move(That.TheValue));
+ break;
+ case State::FAILURE:
+ new (&TheError) Status(std::move(That.TheError));
+ break;
+ case State::MOVED:
+ // Nothing to do in this case.
+ break;
+ }
+ return *this;
+ }
+
+ Expected(Expected &&That) noexcept : TheState(That.TheState) {
+ switch (TheState) {
+ case State::SUCCESS:
+ new (&TheValue) T(std::move(That.TheValue));
+ break;
+ case State::FAILURE:
+ new (&TheError) Status(std::move(That.TheError));
+ break;
+ case State::MOVED:
+ // Nothing to do in this case.
+ break;
+ }
+ That.TheState = State::MOVED;
+ }
+
+ template <typename U>
+ Expected(const Expected<U> &That) : TheState(That.TheState) {
+ switch (TheState) {
+ case State::SUCCESS:
+ new (&TheValue) T(That.TheValue);
+ break;
+ case State::FAILURE:
+ new (&TheError) Status(That.TheError);
+ break;
+ case State::MOVED:
+ // Nothing to do in this case.
+ break;
+ }
+ }
+
+ template <typename U> Expected(Expected<U> &&That) : TheState(That.TheState) {
+ switch (TheState) {
+ case State::SUCCESS:
+ new (&TheValue) T(std::move(That.TheValue));
+ break;
+ case State::FAILURE:
+ new (&TheError) Status(std::move(That.TheError));
+ break;
+ case State::MOVED:
+ // Nothing to do in this case.
+ break;
+ }
+ }
+
+ ~Expected() {
+ switch (TheState) {
+ case State::SUCCESS:
+ TheValue.~T();
+ break;
+ case State::FAILURE:
+ TheError.~Status();
+ break;
+ case State::MOVED:
+ // Nothing to do for this case.
+ break;
+ }
+ }
+
+ /// Returns true if this instance represents failure.
+ bool isError() const { return TheState != State::SUCCESS; }
+
+ /// Gets a reference to the Status object.
+ ///
+ /// Should only be called if isError() returns true.
+ const Status &getError() const {
+ assert(isError());
+ return TheError;
+ }
+
+ /// Gets a const reference to the value object.
+ ///
+ /// Should only be called if isError() returns false.
+ const T &getValue() const {
+ assert(!isError());
+ return TheValue;
+ }
+
+ /// Gets a reference to the value object.
+ ///
+ /// Should only be called if isError() returns false.
+ T &getValue() {
+ assert(!isError());
+ return TheValue;
+ }
+
+ /// Takes the value from this object by moving it to the return value.
+ ///
+ /// Should only be called if isError() returns false.
+ T takeValue() {
+ assert(!isError());
+ TheState = State::MOVED;
+ return std::move(TheValue);
+ }
+
+private:
+ template <typename U> friend class Expected;
+
+ State TheState;
+
+ union {
+ T TheValue;
+ Status TheError;
+ };
+};
+
+} // namespace acxxel
+
+#endif // ACXXEL_STATUS_H
--- /dev/null
+add_executable(acxxel_test acxxel_test.cpp)
+target_link_libraries(
+ acxxel_test
+ acxxel
+ ${GTEST_BOTH_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT})
+add_test(AcxxelTest acxxel_test)
+
+add_executable(span_test span_test.cpp)
+target_link_libraries(
+ span_test
+ ${GTEST_BOTH_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT})
+add_test(SpanTest span_test)
+
+add_executable(status_test status_test.cpp)
+target_link_libraries(
+ status_test
+ ${GTEST_BOTH_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT})
+add_test(StatusTest status_test)
+
+if(ACXXEL_ENABLE_OPENCL)
+add_executable(opencl_test opencl_test.cpp)
+target_link_libraries(
+ opencl_test
+ acxxel
+ ${GTEST_BOTH_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT})
+add_test(OpenCLTest opencl_test)
+endif()
--- /dev/null
+//===--- acxxel_test.cpp - Tests for the Acxxel API -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "acxxel.h"
+#include "config.h"
+#include "gtest/gtest.h"
+
+#include <chrono>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+
+namespace {
+
+template <typename T, size_t N> constexpr size_t size(T (&)[N]) { return N; }
+
+using PlatformGetter = acxxel::Expected<acxxel::Platform *> (*)();
+class AcxxelTest : public ::testing::TestWithParam<PlatformGetter> {};
+
+TEST_P(AcxxelTest, GetDeviceCount) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ int DeviceCount = Platform->getDeviceCount().getValue();
+ EXPECT_GE(DeviceCount, 0);
+}
+
+// Tests all the methods of a DeviceMemorySpan that was created from the asSpan
+// method of a DeviceMemory object.
+//
+// The length is the number of elements in the span. The ElementByteSize is the
+// number of bytes per element in the span.
+//
+// It is assumed that the input span has 10 or more elements.
+template <typename SpanType>
+void testFullDeviceMemorySpan(SpanType &&Span, ptrdiff_t Length,
+ ptrdiff_t ElementByteSize) {
+ EXPECT_GE(Length, 10);
+ EXPECT_GT(ElementByteSize, 0);
+
+ // Full span
+ EXPECT_EQ(Length, Span.length());
+ EXPECT_EQ(Length, Span.size());
+ EXPECT_EQ(Length * ElementByteSize, Span.byte_size());
+ EXPECT_EQ(0, Span.offset());
+ EXPECT_EQ(0, Span.byte_offset());
+ EXPECT_FALSE(Span.empty());
+
+ // Sub-span with first method.
+ auto First2 = Span.first(2);
+ EXPECT_EQ(2, First2.length());
+ EXPECT_EQ(2, First2.size());
+ EXPECT_EQ(2 * ElementByteSize, First2.byte_size());
+ EXPECT_EQ(0, First2.offset());
+ EXPECT_EQ(0, First2.byte_offset());
+ EXPECT_FALSE(First2.empty());
+
+ auto First0 = Span.first(0);
+ EXPECT_EQ(0, First0.length());
+ EXPECT_EQ(0, First0.size());
+ EXPECT_EQ(0, First0.byte_size());
+ EXPECT_EQ(0, First0.offset());
+ EXPECT_EQ(0, First0.byte_offset());
+ EXPECT_TRUE(First0.empty());
+
+ // Sub-span with last method.
+ auto Last2 = Span.last(2);
+ EXPECT_EQ(2, Last2.length());
+ EXPECT_EQ(2, Last2.size());
+ EXPECT_EQ(2 * ElementByteSize, Last2.byte_size());
+ EXPECT_EQ(Length - 2, Last2.offset());
+ EXPECT_EQ((Length - 2) * ElementByteSize, Last2.byte_offset());
+ EXPECT_FALSE(Last2.empty());
+
+ auto Last0 = Span.last(0);
+ EXPECT_EQ(0, Last0.length());
+ EXPECT_EQ(0, Last0.size());
+ EXPECT_EQ(0, Last0.byte_size());
+ EXPECT_EQ(Length, Last0.offset());
+ EXPECT_EQ(Length * ElementByteSize, Last0.byte_offset());
+ EXPECT_TRUE(Last0.empty());
+
+ // Sub-span with subspan method.
+ auto Middle2 = Span.subspan(4, 2);
+ EXPECT_EQ(2, Middle2.length());
+ EXPECT_EQ(2, Middle2.size());
+ EXPECT_EQ(2 * ElementByteSize, Middle2.byte_size());
+ EXPECT_EQ(4, Middle2.offset());
+ EXPECT_EQ(4 * ElementByteSize, Middle2.byte_offset());
+ EXPECT_FALSE(Middle2.empty());
+
+ auto Middle0 = Span.subspan(4, 0);
+ EXPECT_EQ(0, Middle0.length());
+ EXPECT_EQ(0, Middle0.size());
+ EXPECT_EQ(0, Middle0.byte_size());
+ EXPECT_EQ(4, Middle0.offset());
+ EXPECT_EQ(4 * ElementByteSize, Middle0.byte_offset());
+ EXPECT_TRUE(Middle0.empty());
+
+ auto Subspan2AtStart = Span.subspan(0, 2);
+ EXPECT_EQ(2, Subspan2AtStart.length());
+ EXPECT_EQ(2, Subspan2AtStart.size());
+ EXPECT_EQ(2 * ElementByteSize, Subspan2AtStart.byte_size());
+ EXPECT_EQ(0, Subspan2AtStart.offset());
+ EXPECT_EQ(0, Subspan2AtStart.byte_offset());
+ EXPECT_FALSE(Subspan2AtStart.empty());
+
+ auto Subspan2AtEnd = Span.subspan(Length - 2, 2);
+ EXPECT_EQ(2, Subspan2AtEnd.length());
+ EXPECT_EQ(2, Subspan2AtEnd.size());
+ EXPECT_EQ(2 * ElementByteSize, Subspan2AtEnd.byte_size());
+ EXPECT_EQ(Length - 2, Subspan2AtEnd.offset());
+ EXPECT_EQ((Length - 2) * ElementByteSize, Subspan2AtEnd.byte_offset());
+ EXPECT_FALSE(Subspan2AtEnd.empty());
+
+ auto Subspan0AtStart = Span.subspan(0, 0);
+ EXPECT_EQ(0, Subspan0AtStart.length());
+ EXPECT_EQ(0, Subspan0AtStart.size());
+ EXPECT_EQ(0, Subspan0AtStart.byte_size());
+ EXPECT_EQ(0, Subspan0AtStart.offset());
+ EXPECT_EQ(0, Subspan0AtStart.byte_offset());
+ EXPECT_TRUE(Subspan0AtStart.empty());
+
+ auto Subspan0AtEnd = Span.subspan(Length, 0);
+ EXPECT_EQ(0, Subspan0AtEnd.length());
+ EXPECT_EQ(0, Subspan0AtEnd.size());
+ EXPECT_EQ(0, Subspan0AtEnd.byte_size());
+ EXPECT_EQ(Length, Subspan0AtEnd.offset());
+ EXPECT_EQ(Length * ElementByteSize, Subspan0AtEnd.byte_offset());
+ EXPECT_TRUE(Subspan0AtEnd.empty());
+}
+
+TEST_P(AcxxelTest, DeviceMemory) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ acxxel::Expected<acxxel::DeviceMemory<int>> MaybeMemory =
+ Platform->mallocD<int>(10);
+ EXPECT_FALSE(MaybeMemory.isError());
+
+ // ref
+ acxxel::DeviceMemory<int> &MemoryRef = MaybeMemory.getValue();
+ EXPECT_EQ(10, MemoryRef.length());
+ EXPECT_EQ(10, MemoryRef.size());
+ EXPECT_EQ(10 * sizeof(int), static_cast<size_t>(MemoryRef.byte_size()));
+ EXPECT_FALSE(MemoryRef.empty());
+
+ // mutable span
+ acxxel::DeviceMemorySpan<int> MutableSpan = MemoryRef.asSpan();
+ testFullDeviceMemorySpan(MutableSpan, 10, sizeof(int));
+
+ // const ref
+ const acxxel::DeviceMemory<int> &ConstMemoryRef = MaybeMemory.getValue();
+ EXPECT_EQ(10, ConstMemoryRef.length());
+ EXPECT_EQ(10, ConstMemoryRef.size());
+ EXPECT_EQ(10 * sizeof(int), static_cast<size_t>(ConstMemoryRef.byte_size()));
+ EXPECT_FALSE(ConstMemoryRef.empty());
+
+ // immutable span
+ acxxel::DeviceMemorySpan<const int> ImmutableSpan = ConstMemoryRef.asSpan();
+ testFullDeviceMemorySpan(ImmutableSpan, 10, sizeof(int));
+}
+
+TEST_P(AcxxelTest, CopyHostAndDevice) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ acxxel::Stream Stream = Platform->createStream().takeValue();
+ int A[] = {0, 1, 2};
+ std::array<int, size(A)> B;
+ acxxel::DeviceMemory<int> X = Platform->mallocD<int>(size(A)).takeValue();
+ Stream.syncCopyHToD(A, X);
+ Stream.syncCopyDToH(X, B);
+ for (size_t I = 0; I < size(A); ++I)
+ EXPECT_EQ(A[I], B[I]);
+ EXPECT_FALSE(Stream.takeStatus().isError());
+}
+
+TEST_P(AcxxelTest, CopyDToD) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ acxxel::Stream Stream = Platform->createStream().takeValue();
+ int A[] = {0, 1, 2};
+ std::array<int, size(A)> B;
+ acxxel::DeviceMemory<int> X = Platform->mallocD<int>(size(A)).takeValue();
+ acxxel::DeviceMemory<int> Y = Platform->mallocD<int>(size(A)).takeValue();
+ Stream.syncCopyHToD(A, X);
+ Stream.syncCopyDToD(X, Y);
+ Stream.syncCopyDToH(Y, B);
+ for (size_t I = 0; I < size(A); ++I)
+ EXPECT_EQ(A[I], B[I]);
+ EXPECT_FALSE(Stream.takeStatus().isError());
+}
+
+TEST_P(AcxxelTest, AsyncCopyHostAndDevice) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ int A[] = {0, 1, 2};
+ std::array<int, size(A)> B;
+ acxxel::DeviceMemory<int> X = Platform->mallocD<int>(size(A)).takeValue();
+ acxxel::Stream Stream = Platform->createStream().takeValue();
+ acxxel::AsyncHostMemory<int> AsyncA =
+ Platform->registerHostMem(A).takeValue();
+ acxxel::AsyncHostMemory<int> AsyncB =
+ Platform->registerHostMem(B).takeValue();
+ EXPECT_FALSE(Stream.asyncCopyHToD(AsyncA, X).takeStatus().isError());
+ EXPECT_FALSE(Stream.asyncCopyDToH(X, AsyncB).takeStatus().isError());
+ EXPECT_FALSE(Stream.sync().isError());
+ for (size_t I = 0; I < size(A); ++I)
+ EXPECT_EQ(A[I], B[I]);
+}
+
+TEST_P(AcxxelTest, AsyncMemsetD) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ constexpr size_t ArrayLength = 10;
+ std::array<uint32_t, ArrayLength> Host;
+ acxxel::DeviceMemory<uint32_t> X =
+ Platform->mallocD<uint32_t>(ArrayLength).takeValue();
+ acxxel::Stream Stream = Platform->createStream().takeValue();
+ acxxel::AsyncHostMemory<uint32_t> AsyncHost =
+ Platform->registerHostMem(Host).takeValue();
+ EXPECT_FALSE(Stream.asyncMemsetD(X, 0x12).takeStatus().isError());
+ EXPECT_FALSE(Stream.asyncCopyDToH(X, AsyncHost).takeStatus().isError());
+ EXPECT_FALSE(Stream.sync().isError());
+ for (size_t I = 0; I < ArrayLength; ++I)
+ EXPECT_EQ(0x12121212u, Host[I]);
+}
+
+TEST_P(AcxxelTest, RegisterHostMem) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ auto Data = std::unique_ptr<int[]>(new int[3]);
+ acxxel::Expected<acxxel::AsyncHostMemory<const int>> MaybeAsyncHostMemory =
+ Platform->registerHostMem<int>({Data.get(), 3});
+ EXPECT_FALSE(MaybeAsyncHostMemory.isError())
+ << MaybeAsyncHostMemory.getError().getMessage();
+ acxxel::AsyncHostMemory<const int> AsyncHostMemory =
+ MaybeAsyncHostMemory.takeValue();
+ EXPECT_EQ(Data.get(), AsyncHostMemory.data());
+ EXPECT_EQ(3, AsyncHostMemory.size());
+}
+
+struct RefCounter {
+ static int Count;
+
+ RefCounter() { ++Count; }
+ ~RefCounter() { --Count; }
+ RefCounter(const RefCounter &) = delete;
+ RefCounter &operator=(const RefCounter &) = delete;
+};
+
+int RefCounter::Count;
+
+TEST_P(AcxxelTest, OwnedAsyncHost) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ RefCounter::Count = 0;
+ {
+ acxxel::OwnedAsyncHostMemory<RefCounter> A =
+ Platform->newAsyncHostMem<RefCounter>(3).takeValue();
+ EXPECT_EQ(3, RefCounter::Count);
+ }
+ EXPECT_EQ(0, RefCounter::Count);
+}
+
+TEST_P(AcxxelTest, OwnedAsyncCopyHostAndDevice) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ size_t Length = 3;
+ acxxel::OwnedAsyncHostMemory<int> A =
+ Platform->newAsyncHostMem<int>(Length).takeValue();
+ for (size_t I = 0; I < Length; ++I)
+ A[I] = I;
+ acxxel::OwnedAsyncHostMemory<int> B =
+ Platform->newAsyncHostMem<int>(Length).takeValue();
+ acxxel::DeviceMemory<int> X = Platform->mallocD<int>(Length).takeValue();
+ acxxel::Stream Stream = Platform->createStream().takeValue();
+ EXPECT_FALSE(Stream.asyncCopyHToD(A, X).takeStatus().isError());
+ EXPECT_FALSE(Stream.asyncCopyDToH(X, B).takeStatus().isError());
+ EXPECT_FALSE(Stream.sync().isError());
+ for (size_t I = 0; I < Length; ++I)
+ EXPECT_EQ(A[I], B[I]);
+}
+
+TEST_P(AcxxelTest, AsyncCopyDToD) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ int A[] = {0, 1, 2};
+ std::array<int, size(A)> B;
+ acxxel::DeviceMemory<int> X = Platform->mallocD<int>(size(A)).takeValue();
+ acxxel::DeviceMemory<int> Y = Platform->mallocD<int>(size(A)).takeValue();
+ acxxel::Stream Stream = Platform->createStream().takeValue();
+ acxxel::AsyncHostMemory<int> AsyncA =
+ Platform->registerHostMem(A).takeValue();
+ acxxel::AsyncHostMemory<int> AsyncB =
+ Platform->registerHostMem(B).takeValue();
+ EXPECT_FALSE(Stream.asyncCopyHToD(AsyncA, X).takeStatus().isError());
+ EXPECT_FALSE(Stream.asyncCopyDToD(X, Y).takeStatus().isError());
+ EXPECT_FALSE(Stream.asyncCopyDToH(Y, AsyncB).takeStatus().isError());
+ EXPECT_FALSE(Stream.sync().isError());
+ for (size_t I = 0; I < size(A); ++I)
+ EXPECT_EQ(A[I], B[I]);
+}
+
+TEST_P(AcxxelTest, Stream) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ acxxel::Stream Stream = Platform->createStream().takeValue();
+ EXPECT_FALSE(Stream.sync().isError());
+}
+
+TEST_P(AcxxelTest, Event) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ acxxel::Event Event = Platform->createEvent().takeValue();
+ EXPECT_TRUE(Event.isDone());
+ EXPECT_FALSE(Event.sync().isError());
+}
+
+TEST_P(AcxxelTest, RecordEventsInAStream) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ acxxel::Stream Stream = Platform->createStream().takeValue();
+ acxxel::Event Start = Platform->createEvent().takeValue();
+ acxxel::Event End = Platform->createEvent().takeValue();
+ EXPECT_FALSE(Stream.enqueueEvent(Start).takeStatus().isError());
+ EXPECT_FALSE(Start.sync().isError());
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
+ EXPECT_FALSE(Stream.enqueueEvent(End).takeStatus().isError());
+ EXPECT_FALSE(End.sync().isError());
+ EXPECT_GT(End.getSecondsSince(Start).takeValue(), 0);
+}
+
+TEST_P(AcxxelTest, StreamCallback) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ int Value = 0;
+ acxxel::Stream Stream = Platform->createStream().takeValue();
+ EXPECT_FALSE(
+ Stream
+ .addCallback([&Value](acxxel::Stream &, const acxxel::Status &) {
+ Value = 42;
+ })
+ .takeStatus()
+ .isError());
+ EXPECT_FALSE(Stream.sync().isError());
+ EXPECT_EQ(42, Value);
+}
+
+TEST_P(AcxxelTest, WaitForEventsInAStream) {
+ acxxel::Platform *Platform = GetParam()().takeValue();
+ acxxel::Stream Stream0 = Platform->createStream().takeValue();
+ acxxel::Stream Stream1 = Platform->createStream().takeValue();
+ acxxel::Event Event0 = Platform->createEvent().takeValue();
+ acxxel::Event Event1 = Platform->createEvent().takeValue();
+
+ // Thread loops on Stream0 until someone sets the GoFlag, then set the
+ // MarkerFlag.
+
+ std::mutex Mutex;
+ std::condition_variable ConditionVar;
+ bool GoFlag = false;
+ bool MarkerFlag = false;
+
+ EXPECT_FALSE(Stream0
+ .addCallback([&Mutex, &ConditionVar, &GoFlag, &MarkerFlag](
+ acxxel::Stream &, const acxxel::Status &) {
+ std::unique_lock<std::mutex> Lock(Mutex);
+ ConditionVar.wait(Lock,
+ [&GoFlag] { return GoFlag == true; });
+ MarkerFlag = true;
+ })
+ .takeStatus()
+ .isError());
+
+ // Event0 can only occur after GoFlag and MarkerFlag are set.
+ EXPECT_FALSE(Stream0.enqueueEvent(Event0).takeStatus().isError());
+
+ // Use waitOnEvent to make a callback on Stream1 wait for an event on Stream0.
+ EXPECT_FALSE(Stream1.waitOnEvent(Event0).isError());
+ EXPECT_FALSE(Stream1.enqueueEvent(Event1).takeStatus().isError());
+ EXPECT_FALSE(Stream1
+ .addCallback([&Mutex, &MarkerFlag](acxxel::Stream &,
+ const acxxel::Status &) {
+ std::unique_lock<std::mutex> Lock(Mutex);
+ // This makes sure that this callback runs after the
+ // callback on Stream0.
+ EXPECT_TRUE(MarkerFlag);
+ })
+ .takeStatus()
+ .isError());
+
+ // Allow the callback on Stream0 to set MarkerFlag and finish.
+ {
+ std::unique_lock<std::mutex> Lock(Mutex);
+ GoFlag = true;
+ }
+ ConditionVar.notify_one();
+
+ // Make sure the events have finished and that Event1 did not happen before
+ // Event0.
+ EXPECT_FALSE(Event0.sync().isError());
+ EXPECT_FALSE(Event1.sync().isError());
+ EXPECT_FALSE(Stream1.sync().isError());
+}
+
+#if defined(ACXXEL_ENABLE_CUDA) || defined(ACXXEL_ENABLE_OPENCL)
+INSTANTIATE_TEST_CASE_P(BothPlatformTest, AcxxelTest,
+ ::testing::Values(
+#ifdef ACXXEL_ENABLE_CUDA
+ acxxel::getCUDAPlatform
+#ifdef ACXXEL_ENABLE_OPENCL
+ ,
+#endif
+#endif
+#ifdef ACXXEL_ENABLE_OPENCL
+ acxxel::getOpenCLPlatform
+#endif
+ ));
+#endif
+
+} // namespace
--- /dev/null
+//===--- opencl_test.cpp - Tests for OpenCL and the Acxxel API ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "acxxel.h"
+#include "gtest/gtest.h"
+
+#include <array>
+#include <cstring>
+
+namespace {
+
+static const char *SaxpyKernelSource = R"(
+__kernel void saxpyKernel(float A, __global float *X, __global float *Y, int N) {
+ int I = get_global_id(0);
+ if (I < N)
+ X[I] = A * X[I] + Y[I];
+}
+)";
+
+TEST(OpenCL, Saxpy) {
+ constexpr size_t Length = 3;
+
+ float A = 2.f;
+ std::array<float, Length> X = {{0.f, 1.f, 2.f}};
+ std::array<float, Length> Y = {{3.f, 4.f, 5.f}};
+ std::array<float, Length> Expected = {{3.f, 6.f, 9.f}};
+
+ acxxel::Platform *OpenCL = acxxel::getOpenCLPlatform().getValue();
+ acxxel::Stream Stream = OpenCL->createStream().takeValue();
+ auto DeviceX = OpenCL->mallocD<float>(Length).takeValue();
+ auto DeviceY = OpenCL->mallocD<float>(Length).takeValue();
+ Stream.syncCopyHToD(X, DeviceX);
+ Stream.syncCopyHToD(Y, DeviceY);
+ acxxel::Program Program =
+ OpenCL
+ ->createProgramFromSource(acxxel::Span<const char>(
+ SaxpyKernelSource, std::strlen(SaxpyKernelSource)))
+ .takeValue();
+ acxxel::Kernel Kernel = Program.createKernel("saxpyKernel").takeValue();
+ float *RawX = static_cast<float *>(DeviceX);
+ float *RawY = static_cast<float *>(DeviceY);
+ int IntLength = Length;
+ void *Arguments[] = {&A, &RawX, &RawY, &IntLength};
+ size_t ArgumentSizes[] = {sizeof(float), sizeof(float *), sizeof(float *),
+ sizeof(int)};
+ EXPECT_FALSE(
+ Stream.asyncKernelLaunch(Kernel, Length, Arguments, ArgumentSizes)
+ .takeStatus()
+ .isError());
+ Stream.syncCopyDToH(DeviceX, X);
+ EXPECT_FALSE(Stream.sync().isError());
+
+ EXPECT_EQ(X, Expected);
+}
+
+} // namespace
--- /dev/null
+//===--- span_test.cpp - Tests for the span class -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "span.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <array>
+#include <vector>
+
+namespace {
+
+template <typename T, size_t N> size_t arraySize(T (&)[N]) { return N; }
+
+TEST(Span, NullConstruction) {
+ acxxel::Span<int> Span0;
+ EXPECT_EQ(nullptr, Span0.data());
+ EXPECT_EQ(0, Span0.size());
+
+ acxxel::Span<int> Span1(nullptr);
+ EXPECT_EQ(nullptr, Span1.data());
+ EXPECT_EQ(0, Span1.size());
+}
+
+TEST(Span, PtrSizeConstruction) {
+ int ZeroSize = 0;
+ acxxel::Span<int> Span0(nullptr, ZeroSize);
+ EXPECT_EQ(Span0.data(), nullptr);
+ EXPECT_EQ(Span0.size(), 0);
+
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span1(Values, arraySize(Values));
+ EXPECT_EQ(Span1.data(), Values);
+ EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
+
+ acxxel::Span<int> Span2(Values, ZeroSize);
+ EXPECT_EQ(Span2.data(), Values);
+ EXPECT_EQ(Span2.size(), 0);
+}
+
+TEST(Span, PtrSizeConstruction_NegativeCount) {
+ int Values[] = {0, 1, 2};
+ EXPECT_DEATH(acxxel::Span<int> Span0(Values, -1), "terminate");
+}
+
+TEST(Span, PtrSizeConstruction_NullptrNonzeroSize) {
+ EXPECT_DEATH(acxxel::Span<int> Span0(nullptr, 1), "terminate");
+}
+
+TEST(Span, FirstLastConstruction) {
+ int Values[] = {0, 1, 2};
+
+ acxxel::Span<int> Span0(Values, Values);
+ EXPECT_EQ(Span0.data(), Values);
+ EXPECT_EQ(Span0.size(), 0);
+
+ acxxel::Span<int> Span(Values, Values + 2);
+ EXPECT_EQ(Span.data(), Values);
+ EXPECT_EQ(Span.size(), 2);
+}
+
+TEST(Span, FirstLastConstruction_LastBeforeFirst) {
+ int Values[] = {0, 1, 2};
+ EXPECT_DEATH(acxxel::Span<int> Span(Values + 2, Values), "terminate");
+}
+
+TEST(Span, ArrayConstruction) {
+ int Array[] = {0, 1, 2};
+ acxxel::Span<int> Span(Array);
+ EXPECT_EQ(Span.data(), Array);
+ EXPECT_EQ(Span.size(), 3);
+}
+
+TEST(Span, StdArrayConstruction) {
+ std::array<int, 3> Array{0, 1, 2};
+ acxxel::Span<int> Span(Array);
+ EXPECT_EQ(Span.data(), Array.data());
+ EXPECT_EQ(static_cast<size_t>(Span.size()), Array.size());
+
+ std::array<const int, 3> ConstArray{0, 1, 2};
+ acxxel::Span<const int> ConstSpan(ConstArray);
+ EXPECT_EQ(ConstSpan.data(), ConstArray.data());
+ EXPECT_EQ(static_cast<size_t>(ConstSpan.size()), ConstArray.size());
+}
+
+TEST(Span, ContainerConstruction) {
+ std::vector<int> Vector = {0, 1, 2};
+ acxxel::Span<int> Span(Vector);
+ EXPECT_EQ(Span.data(), &Vector[0]);
+ EXPECT_EQ(static_cast<size_t>(Span.size()), Vector.size());
+}
+
+TEST(Span, CopyConstruction) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span0(Values);
+ acxxel::Span<int> Span1(Span0);
+ EXPECT_EQ(Span1.data(), Values);
+ EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
+}
+
+TEST(Span, CopyAssignment) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span0(Values);
+ acxxel::Span<int> Span1;
+ Span1 = Span0;
+ EXPECT_EQ(Span1.data(), Values);
+ EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
+}
+
+TEST(Span, CopyConstFromNonConst) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span0(Values);
+ acxxel::Span<const int> Span1(Span0);
+ EXPECT_EQ(Span1.data(), Values);
+ EXPECT_EQ(static_cast<size_t>(Span1.size()), arraySize(Values));
+}
+
+TEST(Span, FirstMethod) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span(Values);
+ acxxel::Span<int> Span0 = Span.first(0);
+ acxxel::Span<int> Span1 = Span.first(1);
+ acxxel::Span<int> Span2 = Span.first(2);
+ acxxel::Span<int> Span3 = Span.first(3);
+
+ EXPECT_EQ(Span0.data(), Values);
+ EXPECT_EQ(Span1.data(), Values);
+ EXPECT_EQ(Span2.data(), Values);
+ EXPECT_EQ(Span3.data(), Values);
+
+ EXPECT_TRUE(Span0.empty());
+
+ EXPECT_THAT(Span1, ::testing::ElementsAre(0));
+ EXPECT_THAT(Span2, ::testing::ElementsAre(0, 1));
+ EXPECT_THAT(Span3, ::testing::ElementsAre(0, 1, 2));
+}
+
+TEST(Span, FirstMethod_IllegalArguments) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span(Values);
+
+ EXPECT_DEATH(Span.first(-1), "terminate");
+ EXPECT_DEATH(Span.first(4), "terminate");
+}
+
+TEST(Span, LastMethod) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span(Values);
+ acxxel::Span<int> Span0 = Span.last(0);
+ acxxel::Span<int> Span1 = Span.last(1);
+ acxxel::Span<int> Span2 = Span.last(2);
+ acxxel::Span<int> Span3 = Span.last(3);
+
+ EXPECT_EQ(Span0.data(), Values);
+ EXPECT_EQ(Span1.data(), Values + 2);
+ EXPECT_EQ(Span2.data(), Values + 1);
+ EXPECT_EQ(Span3.data(), Values);
+
+ EXPECT_TRUE(Span0.empty());
+
+ EXPECT_THAT(Span1, ::testing::ElementsAre(2));
+ EXPECT_THAT(Span2, ::testing::ElementsAre(1, 2));
+ EXPECT_THAT(Span3, ::testing::ElementsAre(0, 1, 2));
+}
+
+TEST(Span, LastMethod_IllegalArguments) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span(Values);
+
+ EXPECT_DEATH(Span.last(-1), "terminate");
+ EXPECT_DEATH(Span.last(4), "terminate");
+}
+
+TEST(Span, SubspanMethod) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span(Values);
+
+ acxxel::Span<int> Span0 = Span.subspan(0);
+ acxxel::Span<int> Span0e = Span.subspan(0, acxxel::dynamic_extent);
+ acxxel::Span<int> Span00 = Span.subspan(0, 0);
+ acxxel::Span<int> Span01 = Span.subspan(0, 1);
+ acxxel::Span<int> Span02 = Span.subspan(0, 2);
+ acxxel::Span<int> Span03 = Span.subspan(0, 3);
+
+ acxxel::Span<int> Span1 = Span.subspan(1);
+ acxxel::Span<int> Span1e = Span.subspan(1, acxxel::dynamic_extent);
+ acxxel::Span<int> Span10 = Span.subspan(1, 0);
+ acxxel::Span<int> Span11 = Span.subspan(1, 1);
+ acxxel::Span<int> Span12 = Span.subspan(1, 2);
+
+ acxxel::Span<int> Span2 = Span.subspan(2);
+ acxxel::Span<int> Span2e = Span.subspan(2, acxxel::dynamic_extent);
+ acxxel::Span<int> Span20 = Span.subspan(2, 0);
+ acxxel::Span<int> Span21 = Span.subspan(2, 1);
+
+ acxxel::Span<int> Span3 = Span.subspan(3);
+ acxxel::Span<int> Span3e = Span.subspan(3, acxxel::dynamic_extent);
+ acxxel::Span<int> Span30 = Span.subspan(3, 0);
+
+ EXPECT_EQ(Span0.data(), Values);
+ EXPECT_EQ(Span0e.data(), Values);
+ EXPECT_EQ(Span00.data(), Values);
+ EXPECT_EQ(Span01.data(), Values);
+ EXPECT_EQ(Span02.data(), Values);
+ EXPECT_EQ(Span03.data(), Values);
+
+ EXPECT_EQ(Span1.data(), Values + 1);
+ EXPECT_EQ(Span1e.data(), Values + 1);
+ EXPECT_EQ(Span10.data(), Values + 1);
+ EXPECT_EQ(Span11.data(), Values + 1);
+ EXPECT_EQ(Span12.data(), Values + 1);
+
+ EXPECT_EQ(Span2.data(), Values + 2);
+ EXPECT_EQ(Span2e.data(), Values + 2);
+ EXPECT_EQ(Span20.data(), Values + 2);
+ EXPECT_EQ(Span21.data(), Values + 2);
+
+ EXPECT_EQ(Span3.data(), Values + 3);
+ EXPECT_EQ(Span3e.data(), Values + 3);
+ EXPECT_EQ(Span30.data(), Values + 3);
+
+ EXPECT_TRUE(Span00.empty());
+ EXPECT_TRUE(Span10.empty());
+ EXPECT_TRUE(Span20.empty());
+ EXPECT_TRUE(Span30.empty());
+
+ EXPECT_THAT(Span0, ::testing::ElementsAre(0, 1, 2));
+ EXPECT_THAT(Span0e, ::testing::ElementsAre(0, 1, 2));
+ EXPECT_THAT(Span01, ::testing::ElementsAre(0));
+ EXPECT_THAT(Span02, ::testing::ElementsAre(0, 1));
+ EXPECT_THAT(Span03, ::testing::ElementsAre(0, 1, 2));
+
+ EXPECT_THAT(Span1, ::testing::ElementsAre(1, 2));
+ EXPECT_THAT(Span1e, ::testing::ElementsAre(1, 2));
+ EXPECT_THAT(Span11, ::testing::ElementsAre(1));
+ EXPECT_THAT(Span12, ::testing::ElementsAre(1, 2));
+
+ EXPECT_THAT(Span2, ::testing::ElementsAre(2));
+ EXPECT_THAT(Span2e, ::testing::ElementsAre(2));
+ EXPECT_THAT(Span21, ::testing::ElementsAre(2));
+
+ EXPECT_TRUE(Span3.empty());
+ EXPECT_TRUE(Span3e.empty());
+}
+
+TEST(Span, SubspanMethod_IllegalArguments) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span(Values);
+ EXPECT_DEATH(Span.subspan(-1, 0), "terminate");
+ EXPECT_DEATH(Span.subspan(0, -2), "terminate");
+ EXPECT_DEATH(Span.subspan(0, 4), "terminate");
+ EXPECT_DEATH(Span.subspan(1, 3), "terminate");
+ EXPECT_DEATH(Span.subspan(2, 2), "terminate");
+ EXPECT_DEATH(Span.subspan(3, 1), "terminate");
+ EXPECT_DEATH(Span.subspan(4, 0), "terminate");
+}
+
+TEST(Span, ElementAccess) {
+ int Values[] = {0, 1, 2};
+ acxxel::Span<int> Span(Values);
+
+ EXPECT_EQ(&Span[0], Values);
+ EXPECT_EQ(&Span[1], Values + 1);
+ EXPECT_EQ(&Span[2], Values + 2);
+ EXPECT_EQ(&Span(0), Values);
+ EXPECT_EQ(&Span(1), Values + 1);
+ EXPECT_EQ(&Span(2), Values + 2);
+
+ Span[0] = 5;
+ EXPECT_EQ(Values[0], 5);
+
+ Span(0) = 0;
+ EXPECT_EQ(Values[0], 0);
+
+ const int ConstValues[] = {0, 1, 2};
+ acxxel::Span<const int> ConstSpan(ConstValues);
+
+ EXPECT_EQ(&ConstSpan[0], ConstValues);
+ EXPECT_EQ(&ConstSpan[1], ConstValues + 1);
+ EXPECT_EQ(&ConstSpan[2], ConstValues + 2);
+ EXPECT_EQ(&ConstSpan(0), ConstValues);
+ EXPECT_EQ(&ConstSpan(1), ConstValues + 1);
+ EXPECT_EQ(&ConstSpan(2), ConstValues + 2);
+}
+
+} // namespace
--- /dev/null
+//===--- status_test.cpp - Tests for the Status and Expected classes ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "status.h"
+
+#include "gtest/gtest.h"
+
+#include <memory>
+
+namespace {
+
+struct RefCounter {
+ static int Count;
+
+ RefCounter() { ++Count; }
+ ~RefCounter() { --Count; }
+ RefCounter(const RefCounter &) = delete;
+ RefCounter &operator=(const RefCounter &) = delete;
+};
+
+int RefCounter::Count;
+
+TEST(Expected, RefCounter) {
+ RefCounter::Count = 0;
+ using uptr = std::unique_ptr<RefCounter>;
+
+ acxxel::Expected<uptr> E0(uptr(new RefCounter));
+ EXPECT_FALSE(E0.isError());
+ EXPECT_EQ(1, RefCounter::Count);
+
+ acxxel::Expected<uptr> E1(std::move(E0));
+ EXPECT_FALSE(E1.isError());
+ EXPECT_EQ(1, RefCounter::Count);
+
+ acxxel::Expected<uptr> E2(acxxel::Status("nothing in here yet"));
+ EXPECT_TRUE(E2.isError());
+ EXPECT_EQ(1, RefCounter::Count);
+ E2 = std::move(E1);
+ EXPECT_FALSE(E2.isError());
+ EXPECT_EQ(1, RefCounter::Count);
+
+ EXPECT_EQ(1, E2.getValue()->Count);
+ EXPECT_FALSE(E2.isError());
+ EXPECT_EQ(1, RefCounter::Count);
+
+ EXPECT_EQ(1, E2.takeValue()->Count);
+ EXPECT_EQ(0, RefCounter::Count);
+}
+
+} // namespace