---
-name: Bug report
+name: 🐞 Bug report
about: Create a report to help us improve
title: ''
labels: bug
--- /dev/null
+contact_links:
+ - name: 🤔 Question or discussion
+ url: https://github.com/ccache/ccache/discussions
+ about: Please go to https://github.com/ccache/ccache/discussions
---
-name: Feature request
+name: ✨ Feature request
about: Suggest a new feature for this project
title: ''
labels: feature
---
-name: Improvement
+name: 📈 Improvement
about: Suggest an improvement that is neither a bug fix nor a new feature
title: ''
labels: improvement
+++ /dev/null
----
-name: Question
-about: Ask for support or make an enquiry
-title: ''
-labels: support
-assignees: ''
-
----
-### Question ###
-<!-- What do you want help with or know about? -->
CTEST_OUTPUT_ON_FAILURE: ON
VERBOSE: 1
+defaults:
+ run:
+ shell: bash
+
jobs:
build_and_test:
env:
CMAKE_GENERATOR: Ninja
-
+
name: ${{ matrix.config.os }}-${{ matrix.config.compiler }}-${{ matrix.config.version }}
runs-on: ${{ matrix.config.os }}
strategy:
compiler: gcc
version: "10"
- # Enable after https://github.com/ccache/ccache/pull/693
- # - os: ubuntu-16.04
- # compiler: clang
- # version: "3.5"
-
- # Enable after https://github.com/ccache/ccache/pull/693
- # - os: ubuntu-16.04
- # compiler: clang
- # version: "5.0"
+ - os: ubuntu-16.04
+ compiler: clang
+ version: "5.0"
- os: ubuntu-16.04
compiler: clang
- name: Install dependencies
run: |
if [ "${{ runner.os }}" = "Linux" ]; then
+ sudo apt-get update
+
+ # Install ld.gold (binutils) and ld.lld on different runs.
+ # Binding to Ubuntu 20 has no special meaning.
if [ "${{ matrix.config.os }}" = "ubuntu-20.04" ]; then
- sudo apt-get install -y ninja-build elfutils libzstd-dev
+ sudo apt-get install -y ninja-build elfutils libzstd-dev lld
else
- sudo apt-get install -y ninja-build elfutils libzstd1-dev
+ sudo apt-get install -y ninja-build elfutils libzstd1-dev binutils
fi
if [ "${{ matrix.config.compiler }}" = "gcc" ]; then
echo "CC=clang-${{ matrix.config.version }}" >> $GITHUB_ENV
echo "CXX=clang++-${{ matrix.config.version }}" >> $GITHUB_ENV
- sudo apt update
sudo apt install -y clang-${{ matrix.config.version }} g++-multilib
fi
elif [ "${{ runner.os }}" = "macOS" ]; then
- brew install ninja
+ HOMEBREW_NO_AUTO_UPDATE=1 HOMEBREW_NO_INSTALL_CLEANUP=1 \
+ brew install ninja
+
if [ "${{ matrix.config.compiler }}" = "gcc" ]; then
brew install gcc@${{ matrix.config.version }}
echo "CC=gcc-${{ matrix.config.version }}" >> $GITHUB_ENV
if: failure()
uses: actions/upload-artifact@v2
with:
- name: ${{ matrix.config.os }}-${{ matrix.config.compiler }}-{{ matrix.config.version }}-testdir.tar.xz
+ name: ${{ matrix.config.os }}-${{ matrix.config.compiler }}-${{ matrix.config.version }}-testdir.tar.xz
path: testdir.tar.xz
specific_tests:
CC: x86_64-w64-mingw32-gcc-posix
CXX: x86_64-w64-mingw32-g++-posix
ENABLE_CACHE_CLEANUP_TESTS: 1
- CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DCMAKE_SYSTEM_NAME=Windows -DZSTD_FROM_INTERNET=ON -DSTATIC_LINK=ON
+ CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DCMAKE_SYSTEM_NAME=Windows -DZSTD_FROM_INTERNET=ON
RUN_TESTS: unittest-in-wine
apt_get: elfutils mingw-w64 wine
+ - name: Windows VS2019 32-bit
+ os: windows-2019
+ msvc_arch: x64_x86
+ allow_test_failures: true # For now, don't fail the build on failure
+ CC: cl
+ CXX: cl
+ ENABLE_CACHE_CLEANUP_TESTS: 1
+ CMAKE_GENERATOR: Ninja
+ CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DZSTD_FROM_INTERNET=ON
+ TEST_CC: clang -target i686-pc-windows-msvc
+
+ - name: Windows VS2019 64-bit
+ os: windows-2019
+ msvc_arch: x64
+ allow_test_failures: true # For now, don't fail the build on failure
+ CC: cl
+ CXX: cl
+ ENABLE_CACHE_CLEANUP_TESTS: 1
+ CMAKE_GENERATOR: Ninja
+ CMAKE_PARAMS: -DCMAKE_BUILD_TYPE=CI -DZSTD_FROM_INTERNET=ON
+ TEST_CC: clang -target x86_64-pc-windows-msvc
+
- name: Clang address & UB sanitizer
os: ubuntu-20.04
CC: clang
- name: Run apt-get
if: matrix.config.apt_get != ''
- run: sudo apt-get install ${{ matrix.config.apt_get }}
+ run: sudo apt-get update && sudo apt-get install ${{ matrix.config.apt_get }}
+
+ - name: Prepare Windows environment (Visual Studio)
+ if: runner.os == 'Windows'
+ uses: ilammy/msvc-dev-cmd@v1.5.0
+ with:
+ arch: ${{ matrix.config.msvc_arch }}
+
+ - name: Prepare Windows environment (Clang)
+ if: runner.os == 'Windows'
+ shell: powershell
+ run: |
+ $ErrorActionPreference = 'Stop'
+
+ # The test suite currently requires that the compiler specified by the
+ # "CC" environment variable is on a path without spaces. Provide that
+ # by creating a junction from ~/opt/llvm to the Visual Studio path.
+ $null = New-Item `
+ -Path "${HOME}\opt\llvm" `
+ -ItemType Junction `
+ -Target "${env:VCINSTALLDIR}\Tools\Llvm\x64" `
+ -Force
+ "Path=${HOME}\opt\llvm\bin;${env:Path}" | `
+ Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
- name: Build and test
+ id: build-and-test
env:
ASAN_OPTIONS: ${{ matrix.config.ASAN_OPTIONS }}
BUILDDIR: ${{ matrix.config.BUILDDIR }}
CC: ${{ matrix.config.CC }}
CCACHE_LOC: ${{ matrix.config.CCACHE_LOC }}
CFLAGS: ${{ matrix.config.CFLAGS }}
+ CMAKE_GENERATOR: ${{ matrix.config.CMAKE_GENERATOR }}
CMAKE_PARAMS: ${{ matrix.config.CMAKE_PARAMS }}
CXX: ${{ matrix.config.CXX }}
CXXFLAGS: ${{ matrix.config.CXXFLAGS }}
LDFLAGS: ${{ matrix.config.LDFLAGS }}
RUN_TESTS: ${{ matrix.config.RUN_TESTS }}
SPECIAL: ${{ matrix.config.SPECIAL }}
- run: ci/build
+ TEST_CC: ${{ matrix.config.TEST_CC }}
+ run: |
+ rc=0
+ ci/build || rc=$?
+ echo "::set-output name=exit_status::$rc"
+ exit $rc
+ # CTest exits with return code 8 on test failure.
+ continue-on-error: ${{ matrix.config.allow_test_failures == true &&
+ steps.build-and-test.outputs.exit_status == 8 }}
- name: Collect testdir from failed tests
- if: failure()
+ if: failure() || steps.build-and-test.outcome == 'failure'
run: ci/collect-testdir
# TODO: in case of build-and-verify-*package the BUILDDIR is set within those scripts.
- name: Upload testdir from failed tests
- if: failure()
+ if: failure() || steps.build-and-test.outcome == 'failure'
uses: actions/upload-artifact@v2
with:
name: ${{ matrix.config.name }} - testdir.tar.xz
uses: actions/checkout@v2
- name: Install codespell
- run: sudo apt-get install codespell
+ run: sudo apt-get update && sudo apt-get install codespell
- name: Run codespell
run: codespell -q 7 -S ".git,LICENSE.adoc,./src/third_party/*" -I misc/codespell-allowlist.txt
--- /dev/null
+# More info:
+# https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning
+
+name: "CodeQL"
+
+on:
+ push:
+ branches: ["*"]
+ pull_request:
+ # The branches below must be a subset of the branches above
+ branches: ["*"]
+ paths-ignore:
+ - '**/*.adoc'
+ - '**/*.bash'
+ - '**/*.md'
+ schedule:
+ # Full scan once a week
+ - cron: '0 14 * * 3'
+
+jobs:
+ analyze:
+ name: Analyze
+ runs-on: ubuntu-18.04
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v2
+ with:
+ # We must fetch at least the immediate parents so that if this is
+ # a pull request then we can checkout the head.
+ fetch-depth: 2
+
+ - name: Install dependencies
+ run: sudo apt-get update && sudo apt-get install ninja-build elfutils libzstd1-dev
+
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v1
+ with:
+ languages: cpp
+ queries: +security-and-quality
+
+ - name: Build
+ run: ci/build
+ env:
+ RUN_TESTS: none
+ CMAKE_GENERATOR: Ninja
+
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v1
Erik Flodin <erik@ejohansson.se>
Hongli Lai <hongli@phusion.nl>
Jonny Yu <yingshen.yu@gmail.com>
+Ka Ho Ng <khng300@gmail.com>
Kona Blend <kona8lend@gmail.com>
Leanid Chaika <leanid.chaika@gmail.com>
Luboš Luňák <l.lunak@centrum.cz> <l.lunak@suse.cz>
cmake_minimum_required(VERSION 3.4.3)
-project(ccache LANGUAGES C CXX ASM)
+project(ccache LANGUAGES C CXX)
+if(MSVC)
+ enable_language(ASM_MASM)
+else()
+ enable_language(ASM)
+endif()
set(CMAKE_PROJECT_DESCRIPTION "a fast C/C++ compiler cache")
if(NOT "${CMAKE_CXX_STANDARD}")
#
# Settings
#
+include(CcacheVersion)
+
+if("${CCACHE_VERSION_ORIGIN}" STREQUAL git OR DEFINED ENV{CI})
+ set(CCACHE_DEV_MODE ON)
+else()
+ set(CCACHE_DEV_MODE OFF)
+endif()
+message(STATUS "Ccache dev mode: ${CCACHE_DEV_MODE}")
+
+include(UseCcache)
+include(UseFastestLinker)
include(StandardSettings)
include(StandardWarnings)
include(CIBuildType)
include(GenerateConfigurationFile)
include(GenerateVersionFile)
-if(HAVE_SYS_MMAN_H)
+if(HAVE_SYS_MMAN_H AND HAVE_PTHREAD_MUTEXATTR_SETPSHARED)
set(INODE_CACHE_SUPPORTED 1)
endif()
option(ENABLE_TRACING "Enable possibility to use internal ccache tracing" OFF)
if(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
- option(STATIC_LINK "Link statically with system libraries" OFF)
+ option(STATIC_LINK "Link statically with system libraries" ON)
endif()
#
There are several options:
-1. Ask a question in the [issue
- tracker](https://github.com/ccache/ccache/issues/new/choose).
+1. Ask a question in
+ [discussions](https://github.com/ccache/ccache/issues/discussions).
2. Post your question to the [mailing
list](https://lists.samba.org/mailman/listinfo/ccache/).
3. Chat in the [Gitter room](https://gitter.im/ccache/ccache).
avoid potentially wasting time on doing something that may need major rework
to be accepted, or maybe doesn't end up being accepted at all.
* Is your pull request "work in progress", i.e. you don't think that it's ready
- for merging yet but you want early comments and CI test results? Then create
- a draft pull request as described in [this Github blog
+ for merging yet but you want early comments and CI test results? Then create a
+ draft pull request as described in [this Github blog
post](https://github.blog/2019-02-14-introducing-draft-pull-requests/).
* Please follow the ccache's code style (see the section below).
* Consider [A Note About Git Commit
## Code style
Ccache was written in C99 until 2019 when it started being converted to C++11.
-The conversion is a slow work in progress, which is why there is a lot of
-C-style code left. Please refrain from doing large C to C++ conversions; do it
-little by little.
+The conversion is a slow work in progress, which is why there is some C-style
+code left. Please refrain from doing large C to C++ conversions; do it little by
+little.
Source code formatting is defined by `.clang-format` in the root directory. The
format is loosely based on [LLVM's code formatting
highly recommended to install
[Clang-Format](https://clang.llvm.org/docs/ClangFormat.html) 6.0 or newer and
run `make format` to format changes according to ccache's code style. Or even
-better: set up your editor to run Clang-Format automatically when saving. If
-you don't run Clang-Format then the ccache authors have to do it for you.
+better: set up your editor to run Clang-Format automatically when saving. If you
+don't run Clang-Format then the ccache authors have to do it for you.
Please follow these conventions:
* Use `UpperCamelCase` for types (e.g. classes and structs) and namespaces.
* Use `UPPER_CASE` names for macros and (non-class )enum values.
-* Use `snake_case` for other names (functions, variables, enum class values, etc.).
+* Use `snake_case` for other names (functions, variables, enum class values,
+ etc.).
* Use an `m_` prefix for non-public member variables.
* Use a `g_` prefix for global mutable variables.
* Use a `k_` prefix for global constants.
-------------------------------------------------------------------------------
Copyright (C) 2002-2007 Andrew Tridgell
-Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+Copyright (C) 2009-2021 Joel Rosdahl and other contributors
-------------------------------------------------------------------------------
under less restrictive terms.
-src/third_party/base32hex.[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/base32hex.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
This base32hex implementation comes from
<https://github.com/pmconrad/tinydnssec>.
-------------------------------------------------------------------------------
-src/third_party/blake3/*.[hcS]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/blake3/blake3_*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a subset of https://github.com/BLAKE3-team/BLAKE3[BLAKE3] 0.3.7 with
the following license:
~~~~~~~~~~~~~~~~~~~~~~~~~
This is the single header version of https://github.com/onqtam/doctest[doctest]
-2.4.0 with the following license:
+2.4.4 with the following license:
-------------------------------------------------------------------------------
The MIT License (MIT)
-Copyright (c) 2016-2019 Viktor Kirilov
+Copyright (c) 2016-2020 Viktor Kirilov
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
src/third_party/fmt/*.h and src/third_party/format.cpp
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This is a subset of https://fmt.dev[fmt] 7.0.3 with the following license:
+This is a subset of https://fmt.dev[fmt] 7.1.3 with the following license:
-------------------------------------------------------------------------------
Formatting library for C++
-------------------------------------------------------------------------------
-src/third_party/getopt_long.[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/getopt_long.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This implementation of `getopt_long()` was copied from
https://www.postgresql.org[PostgreSQL] and has the following license text:
-------------------------------------------------------------------------------
-src/third_party/minitrace.[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/minitrace.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
A library for producing JSON traces suitable for Chrome's built-in trace viewer
(chrome://tracing). Downloaded from <https://github.com/hrydgard/minitrace>.
src/third_party/nonstd/optional.hpp
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This alternative implementation of `std::optional` was downloaded from
-<https://github.com/martinmoene/optional-lite> and has the following license
-text:
+This is the single header version of
+https://github.com/martinmoene/optional-lite[optional-lite] 3.4.0 with the
+following license:
-------------------------------------------------------------------------------
+Copyright (c) 2014-2018 Martin Moene
+
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
text:
-------------------------------------------------------------------------------
+Copyright 2017-2020 by Martin Moene
+
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
-------------------------------------------------------------------------------
-src/third_party/win32/getopt.[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/win32/getopt.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This implementation of `getopt_long()` for Win32 was taken from
https://www.codeproject.com/Articles/157001/Full-getopt-Port-for-Unicode-and-Multibyte-Microso
https://www.gnu.org/licenses/lgpl-3.0.html.
-src/third_party/xxh(ash|_x86dispatch).[hc]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+src/third_party/win32/mktemp.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This implementation of `mkstemp()` for Win32 was adapted from
+<https://github.com/openbsd/src/blob/99b791d14c0f1858d87a0c33b55880fb9b00be66/lib/libc/stdio/mktemp.c>
+and has the folowing license text:
+
+-------------------------------------------------------------------------------
+Copyright (c) 1996-1998, 2008 Theo de Raadt
+Copyright (c) 1997, 2008-2009 Todd C. Miller
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+-------------------------------------------------------------------------------
+
+src/third_party/xxh*
+~~~~~~~~~~~~~~~~~~~~
xxHash - Extremely Fast Hash algorithm. Copied from xxHash v0.8.0 downloaded
from <https://github.com/Cyan4973/xxHash/releases>.
* [Mailing list](https://lists.samba.org/mailman/listinfo/ccache/)
* [Chat](https://gitter.im/ccache/ccache)
* [Bug report info](https://ccache.dev/bugs.html)
+* [Discussions](https://github.com/ccache/ccache/discussions)
* [Issue tracker](https://github.com/ccache/ccache/issues)
* [Help wanted!](https://github.com/ccache/ccache/labels/help%20wanted)
* [Good first issues!](https://github.com/ccache/ccache/labels/good%20first%20issue)
set -eu
+# Unset CI variable to trigger ccache user build mode.
+unset CI
+
# Ninja builds with relative paths so that ccache can be used to cache the build
# without resorting to setting base_dir.
export CMAKE_GENERATOR=Ninja
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel CI."
FORCE)
-string(REPLACE -DNDEBUG "" CMAKE_CXX_FLAGS_CI ${CMAKE_CXX_FLAGS_CI})
-string(REPLACE -DNDEBUG "" CMAKE_C_FLAGS_CI ${CMAKE_C_FLAGS_CI})
+string(REGEX REPLACE "[/-]DNDEBUG" "" CMAKE_CXX_FLAGS_CI ${CMAKE_CXX_FLAGS_CI})
+string(REGEX REPLACE "[/-]DNDEBUG" "" CMAKE_C_FLAGS_CI ${CMAKE_C_FLAGS_CI})
string(STRIP ${CMAKE_CXX_FLAGS_CI} CMAKE_CXX_FLAGS_CI)
string(STRIP ${CMAKE_C_FLAGS_CI} CMAKE_C_FLAGS_CI)
endif()
# From CcacheVersion.cmake.
-set(CPACK_PACKAGE_VERSION ${VERSION})
+set(CPACK_PACKAGE_VERSION ${CCACHE_VERSION})
set(CPACK_VERBATIM_VARIABLES ON)
set(
CPACK_PACKAGE_FILE_NAME
- "ccache-${VERSION}-${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}"
+ "ccache-${CCACHE_VERSION}-${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}"
)
include(CPack)
+# This script sets two variables:
+#
+# - CCACHE_VERSION (version string)
+# - CCACHE_VERSION_ORIGIN (archive or git)
+#
# There are three main scenarios:
#
# 1. Building from a source code archive generated by "git archive", e.g. the
# 3. Building from a Git repository. In this case the version will be a proper
# version if building a tagged commit, otherwise "branch.hash(+dirty)". In
# case Git is not available, the version will be "unknown".
+#
+# CCACHE_VERSION_ORIGIN is set to "archive" in scenario 1 and "git" in scenario
+# 3.
-set(version_info "897b6065398b5e80402ae1c51a60a2cefc765ed1 HEAD, tag: v4.1, origin/master, origin/HEAD, master")
+set(version_info "12ecd73fcd8aa7024d5851c1738223b8aff0c6e9 HEAD, tag: v4.2, origin/master, origin/HEAD, master")
if(version_info MATCHES "^([0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f])[0-9a-f]* (.*)")
# Scenario 1.
+ set(CCACHE_VERSION_ORIGIN archive)
+
set(hash "${CMAKE_MATCH_1}")
set(ref_names "${CMAKE_MATCH_2}")
if(ref_names MATCHES "tag: v([^,]+)")
# Tagged commit.
- set(VERSION "${CMAKE_MATCH_1}")
+ set(CCACHE_VERSION "${CMAKE_MATCH_1}")
else()
# Untagged commit.
- set(VERSION "${hash}")
+ set(CCACHE_VERSION "${hash}")
endif()
elseif(EXISTS "${CMAKE_SOURCE_DIR}/.git")
# Scenario 3.
+ set(CCACHE_VERSION_ORIGIN git)
+
find_package(Git QUIET)
if(NOT GIT_FOUND)
- set(VERSION "unknown")
+ set(CCACHE_VERSION "unknown")
message(WARNING "Could not find git")
else()
macro(git)
git(describe --abbrev=8 --dirty)
if(git_stdout MATCHES "^v([^-]+)(-dirty)?$")
- set(VERSION "${CMAKE_MATCH_1}")
+ set(CCACHE_VERSION "${CMAKE_MATCH_1}")
if(NOT "${CMAKE_MATCH_2}" STREQUAL "")
- set(VERSION "${VERSION}+dirty")
+ set(CCACHE_VERSION "${CCACHE_VERSION}+dirty")
endif()
elseif(git_stdout MATCHES "^v[^-]+-[0-9]+-g([0-9a-f]+)(-dirty)?$")
set(hash "${CMAKE_MATCH_1}")
git(rev-parse --abbrev-ref HEAD)
set(branch "${git_stdout}")
- set(VERSION "${branch}.${hash}${dirty}")
+ set(CCACHE_VERSION "${branch}.${hash}${dirty}")
endif() # else: fail below
endif()
endif()
-if(VERSION STREQUAL "")
+if(CCACHE_VERSION STREQUAL "")
# Scenario 2 or unexpected error.
message(SEND_ERROR "Cannot determine Ccache version")
endif()
+
+message(STATUS "Ccache version: ${CCACHE_VERSION}")
+++ /dev/null
-include(CMakeCheckCompilerFlagCommonPatterns)
-
-function(check_asm_compiler_flag flag var)
- if(DEFINED "${var}")
- return()
- endif()
-
- set(locale_vars LC_ALL LC_MESSAGES LANG)
- foreach(v IN LISTS locale_vars)
- set(locale_vars_saved_${v} "$ENV{${v}}")
- set(ENV{${v}} C)
- endforeach()
-
- check_compiler_flag_common_patterns(common_patterns)
-
- set(test_file "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.S")
- file(WRITE "${test_file}" ".global main\nmain:\n")
-
- if(NOT CMAKE_REQUIRED_QUIET)
- message(STATUS "Performing Test ${var}")
- endif()
- try_compile(
- ${var}
- "${CMAKE_BINARY_DIR}"
- "${test_file}"
- COMPILE_DEFINITIONS "${flag}"
- OUTPUT_VARIABLE output)
-
- check_compiler_flag_common_patterns(common_fail_patterns)
-
- foreach(regex ${common_fail_patterns})
- if("${output}" MATCHES "${regex}")
- set(${var} 0)
- endif()
- endforeach()
-
- if(${${var}})
- set(${var} 1 CACHE INTERNAL "Test ${var}")
- if(NOT CMAKE_REQUIRED_QUIET)
- message(STATUS "Performing Test ${var} - Success")
- endif()
- file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
- "Performing ASM SOURCE FILE Test ${var} succeeded with the following output:\n"
- "${output}\n"
- "Source file was:\n${test_file}\n")
- else()
- if(NOT CMAKE_REQUIRED_QUIET)
- message(STATUS "Performing Test ${var} - Failed")
- endif()
- set(${var} "" CACHE INTERNAL "Test ${var}")
- file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
- "Performing ASM SOURCE FILE Test ${var} failed with the following output:\n"
- "${output}\n"
- "Source file was:\n${test_file}\n")
- endif()
-
- foreach(v IN LISTS locale_vars)
- set(ENV{${v}} ${locale_vars_saved_${v}})
- endforeach()
-
- set(${var} "${${var}}" PARENT_SCOPE)
-endfunction()
# Default to Release for end user builds (from source archive) and Debug for
# development builds (in a Git repository).
-if(EXISTS "${CMAKE_SOURCE_DIR}/.git")
+if(CCACHE_DEV_MODE)
set(
CMAKE_BUILD_TYPE "Debug"
CACHE STRING "Choose the type of build." FORCE)
--- /dev/null
+include(CheckCXXCompilerFlag)
+
+# check_cxx_compiler_flag caches the result, so a unique variable name is
+# required for every flag to be checked.
+#
+# Parameters:
+#
+# * flag [in], e.g. FLAG
+# * var_name_of_var_name [in], e.g. "TEMP". This is the variable that "HAS_FLAG"
+# will be written to.
+function(generate_unique_has_flag_var_name flag var_name_of_var_name)
+ string(REGEX REPLACE "[=-]" "_" var_name "${flag}")
+ string(TOUPPER "${var_name}" var_name)
+ set(${var_name_of_var_name} "HAS_${var_name}" PARENT_SCOPE)
+endfunction()
+
+macro(add_compile_flag_if_supported_ex varname flag alternative_flag)
+ # has_flag will contain "HAS_$flag" so each flag gets a unique HAS variable.
+ generate_unique_has_flag_var_name("${flag}" "has_flag")
+
+ # Instead of passing "has_flag" this passes the content of has_flag.
+ check_cxx_compiler_flag("${flag}" "${has_flag}")
+
+ if(${${has_flag}})
+ list(APPEND "${varname}" "${flag}")
+ elseif("${alternative_flag}")
+ add_compile_flag_if_supported_ex("${varname}" ${alternative_flag} "")
+ endif()
+endmacro()
+
+macro(add_compile_flag_if_supported varname flag)
+ add_compile_flag_if_supported_ex("${varname}" "${flag}" "")
+endmacro()
+
+set(
+ _clang_gcc_warnings
+ -Wextra
+ -Wnon-virtual-dtor
+ -Wcast-align
+ -Wunused
+ -Woverloaded-virtual
+ -Wpedantic
+
+ # Candidates for enabling in the future:
+ # -Wshadow
+ # -Wold-style-cast
+ # -Wconversion
+ # -Wsign-conversion
+ # -Wnull-dereference
+ # -Wformat=2
+)
+
+# Tested separately as this is not supported by Clang 3.4.
+add_compile_flag_if_supported(_clang_gcc_warnings "-Wdouble-promotion")
+
+if(WARNINGS_AS_ERRORS)
+ list(APPEND _clang_gcc_warnings -Werror)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ list(APPEND CCACHE_COMPILER_WARNINGS ${_clang_gcc_warnings})
+
+ if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0)
+ list(
+ APPEND
+ CCACHE_COMPILER_WARNINGS
+ -Qunused-arguments
+ -Wno-error=unreachable-code
+ )
+ endif()
+
+ # If compiler supports -Wshadow-field-in-constructor, disable only that.
+ # Otherwise disable shadow.
+ add_compile_flag_if_supported_ex(
+ CCACHE_COMPILER_WARNINGS "-Wno-shadow-field-in-constructor" "-Wno-shadow")
+
+ # Disable C++20 compatibility for now.
+ add_compile_flag_if_supported(CCACHE_COMPILER_WARNINGS "-Wno-c++2a-compat")
+
+ # If compiler supports these warnings they have to be disabled for now.
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-zero-as-null-pointer-constant")
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-undefined-func-template")
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-return-std-move-in-c++11")
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ list(
+ APPEND
+ CCACHE_COMPILER_WARNINGS
+ ${_clang_gcc_warnings}
+
+ # Warn about logical operations being used where bitwise were probably
+ # wanted.
+ -Wlogical-op
+
+ # Candidates for enabling in the future:
+ # -Wduplicated-cond
+ # -Wduplicated-branches
+ # -Wuseless-cast
+ )
+
+ # TODO: Exact version or reason unknown, discovered in Ubuntu 14 Docker test
+ # with GCC 4.8.4
+ if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8.5)
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-missing-field-initializers")
+ add_compile_flag_if_supported(
+ CCACHE_COMPILER_WARNINGS "-Wno-unused-variable")
+ endif()
+elseif(MSVC)
+ # Remove any warning level flags added by CMake.
+ string(REGEX REPLACE "/W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+ string(REGEX REPLACE "/W[0-4]" "" CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS}")
+ string(REGEX REPLACE "/W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+ if(WARNINGS_AS_ERRORS)
+ list(APPEND CCACHE_COMPILER_WARNINGS /WX)
+ endif()
+
+ list(
+ APPEND
+ CCACHE_COMPILER_WARNINGS
+ /W4
+ # Ignore bad macro in winbase.h triggered by /Zc:preprocessor:
+ /wd5105
+ # Conversion warnings:
+ /wd4244
+ /wd4245
+ /wd4267
+ # Assignment in conditional:
+ /wd4706
+ # Non-underscore-prefixed POSIX functions:
+ /wd4996
+ )
+endif()
# Although ${zstd_FIND_VERSION} was requested, let's download a newer version.
# Note: The directory structure has changed in 1.3.0; we only support 1.3.0
# and newer.
- set(zstd_version "1.4.5")
+ set(zstd_version "1.4.8")
set(zstd_url https://github.com/facebook/zstd/archive/v${zstd_version}.tar.gz)
set(zstd_dir ${CMAKE_BINARY_DIR}/zstd-${zstd_version})
getopt_long
getpwuid
gettimeofday
- mkstemp
posix_fallocate
realpath
setenv
}
]=]
HAVE_PTHREAD_MUTEX_ROBUST)
+check_function_exists(pthread_mutexattr_setpshared HAVE_PTHREAD_MUTEXATTR_SETPSHARED)
set(CMAKE_REQUIRED_LINK_OPTIONS)
include(CheckStructHasMember)
check_struct_has_member("struct statfs" f_fstypename sys/mount.h
HAVE_STRUCT_STATFS_F_FSTYPENAME)
-include(CheckCXXCompilerFlag)
-
-# Old GCC versions don't have the required header support.
-# Old Apple Clang versions seem to support -mavx2 but not the target
-# attribute that's used to enable AVX2 for a certain function.
-if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
- OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0))
- message(STATUS "Detected unsupported compiler for HAVE_AVX2 - disabled")
- set(HAVE_AVX2 FALSE)
-else()
- check_cxx_compiler_flag(-mavx2 HAVE_AVX2)
-endif()
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles(
+ [=[
+ #include <immintrin.h>
+ void func() __attribute__((target("avx2")));
+ void func() { _mm256_abs_epi8(_mm256_set1_epi32(42)); }
+ int main()
+ {
+ func();
+ return 0;
+ }
+ ]=]
+ HAVE_AVX2)
list(APPEND CMAKE_REQUIRED_LIBRARIES ws2_32)
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ws2_32)
-include(CcacheVersion)
configure_file(
${CMAKE_SOURCE_DIR}/cmake/version.cpp.in
${CMAKE_BINARY_DIR}/src/version.cpp
@ONLY)
-message(STATUS "Ccache version: ${VERSION}")
# be linked privately by all product and test code, but not by third party code.
add_library(standard_warnings INTERFACE)
-if(IS_DIRECTORY "${CMAKE_SOURCE_DIR}/.git" OR DEFINED ENV{"CI"})
- # Enabled by default for development builds and CI builds.
+if(CCACHE_DEV_MODE)
+ # Enabled by default for developer builds.
option(WARNINGS_AS_ERRORS "Treat compiler warnings as errors" TRUE)
else()
- # Disabled by default for end user builds so compilation doesn't fail with new
+ # Disabled by default for user builds so compilation doesn't fail with new
# compilers that may emit new warnings.
option(WARNINGS_AS_ERRORS "Treat compiler warnings as errors" FALSE)
endif()
-include(CheckCXXCompilerFlag)
-
-# check_cxx_compiler_flag caches the result, so a unique variable name is
-# required for every flag to be checked.
-#
-# Parameters:
-#
-# * flag [in], e.g. FLAG
-# * var_name_of_var_name [in], e.g. "TEMP". This is the variable that "HAS_FLAG"
-# will be written to.
-function(generate_unique_has_flag_var_name flag var_name_of_var_name)
- string(REGEX REPLACE "[=-]" "_" var_name "${flag}")
- string(TOUPPER "${var_name}" var_name)
- set(${var_name_of_var_name} "HAS_${var_name}" PARENT_SCOPE)
-endfunction()
-
-function(add_target_compile_flag_if_supported_ex target flag alternative_flag)
- # has_flag will contain "HAS_$flag" so each flag gets a unique HAS variable.
- generate_unique_has_flag_var_name("${flag}" "has_flag")
-
- # Instead of passing "has_flag" this passes the content of has_flag.
- check_cxx_compiler_flag("${flag}" "${has_flag}")
-
- if(${${has_flag}})
- target_compile_options(${target} INTERFACE "${flag}")
- elseif("${alternative_flag}")
- add_target_compile_flag_if_supported_ex(${target} ${alternative_flag} "")
- endif()
-endfunction()
-
-# TODO: Is there a better way to provide an optional third argument?
-macro(add_target_compile_flag_if_supported target flag)
- add_target_compile_flag_if_supported_ex("${target}" "${flag}" "")
-endmacro()
-
-set(CLANG_GCC_WARNINGS
- -Wall
- -Wextra
- -Wnon-virtual-dtor
- -Wcast-align
- -Wunused
- -Woverloaded-virtual
- -Wpedantic
-
- # Candidates for enabling in the future:
- # -Wshadow
- # -Wold-style-cast
- # -Wconversion
- # -Wsign-conversion
- # -Wnull-dereference
- # -Wformat=2
-)
-# Tested separately as this is not supported by Clang 3.4.
-add_target_compile_flag_if_supported(standard_warnings "-Wdouble-promotion")
-
-if(WARNINGS_AS_ERRORS)
- set(CLANG_GCC_WARNINGS ${CLANG_GCC_WARNINGS} -Werror)
+if(NOT MSVC)
+ set(CCACHE_COMPILER_WARNINGS -Wall)
endif()
-if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
- if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0)
- set(
- CLANG_GCC_WARNINGS
- ${CLANG_GCC_WARNINGS}
- -Qunused-arguments
- -Wno-error=unreachable-code)
- endif()
-
- target_compile_options(
- standard_warnings
- INTERFACE
- ${CLANG_GCC_WARNINGS}
- -Weverything
- -Wno-c++98-compat-pedantic
- -Wno-c++98-compat
- -Wno-constexpr-not-const
- -Wno-conversion
- -Wno-disabled-macro-expansion
- -Wno-documentation-unknown-command
- -Wno-exit-time-destructors
- -Wno-format-nonliteral
- -Wno-global-constructors
- -Wno-implicit-fallthrough
- -Wno-padded
- -Wno-shadow # Warnings in fmtlib
- -Wno-shorten-64-to-32
- -Wno-sign-conversion
- -Wno-signed-enum-bitfield # Warnings in fmtlib
- -Wno-weak-vtables
- -Wno-old-style-cast)
-
- # If compiler supports -Wshadow-field-in-constructor, disable only that.
- # Otherwise disable shadow.
- add_target_compile_flag_if_supported_ex(
- standard_warnings "-Wno-shadow-field-in-constructor" "-Wno-shadow")
-
- # Disable C++20 compatibility for now.
- add_target_compile_flag_if_supported(standard_warnings "-Wno-c++2a-compat")
-
- # If compiler supports these warnings they have to be disabled for now.
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-zero-as-null-pointer-constant")
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-undefined-func-template")
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-return-std-move-in-c++11")
-elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- target_compile_options(
- standard_warnings
- INTERFACE ${CLANG_GCC_WARNINGS}
- # Warn about logical operations being used where bitwise were probably
- # wanted.
- -Wlogical-op
-
- # Candidates for enabling in the future:
- # -Wduplicated-cond
- # -Wduplicated-branches
- # -Wuseless-cast
- )
-
- # TODO: Exact version or reason unknown, discovered in Ubuntu 14 Docker test
- # with GCC 4.8.4
- if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8.5)
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-missing-field-initializers")
- add_target_compile_flag_if_supported(
- standard_warnings "-Wno-unused-variable")
- endif()
-elseif(MSVC)
- # Remove any warning level flags added by CMake.
- string(REGEX REPLACE "/W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
- string(REGEX REPLACE "/W[0-4]" "" CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS}")
- string(REGEX REPLACE "/W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-
- target_compile_options(
- standard_warnings
- INTERFACE
- /W4
- # Ignore bad macro in winbase.h triggered by /Zc:preprocessor
- /wd5105
- # Conversion warnings.
- /wd4244
- /wd4267
- # Assignment in conditional.
- /wd4706
- # Non-underscore-prefixed POSIX functions.
- /wd4996
- )
+if(CCACHE_DEV_MODE)
+ include(DevModeWarnings)
endif()
+
+target_compile_options(standard_warnings INTERFACE ${CCACHE_COMPILER_WARNINGS})
--- /dev/null
+# Note: Compiling ccache via ccache is fine because the ccache version installed
+# in the system is used.
+
+# Calls `message(VERBOSE msg)` if and only if VERBOSE is available (since CMake
+# 3.15). Call CMake with --log-level=VERBOSE to view verbose messages.
+function(message_verbose msg)
+ if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15")
+ message(VERBOSE ${msg})
+ endif()
+endfunction()
+
+function(use_ccache)
+ if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
+ message(WARNING "use_ccache() disabled, as it is not called from the project top level")
+ return()
+ endif()
+
+ find_program(CCACHE_PROGRAM ccache)
+ if(NOT CCACHE_PROGRAM)
+ message_verbose("Ccache program not found, not enabling ccache for faster recompilation")
+ return()
+ endif()
+
+ message_verbose("Ccache enabled for faster recompilation")
+
+ # Note: This will override any config and environment settings.
+ set(ccache_env
+ # Another option would be CMAKE_BINARY_DIR, but currently only one base
+ # directory is supported.
+ CCACHE_BASEDIR=${CMAKE_SOURCE_DIR}
+
+ # In case of very old ccache versions (pre 3.3).
+ CCACHE_CPP2=true
+ )
+
+ if(CMAKE_GENERATOR MATCHES "Ninja|Makefiles")
+ find_program(ENV_PROGRAM env)
+ if(ENV_PROGRAM)
+ set(env_program ${ENV_PROGRAM}) # faster than "cmake -E env"
+ else()
+ set(env_program ${CMAKE_COMMAND} -E env)
+ endif()
+ foreach(lang IN ITEMS C CXX OBJC OBJCXX CUDA)
+ set(CMAKE_${lang}_COMPILER_LAUNCHER
+ ${env_program} ${ccache_env} ${CCACHE_PROGRAM}
+ PARENT_SCOPE)
+ endforeach()
+ elseif(CMAKE_GENERATOR STREQUAL Xcode)
+ foreach(lang IN ITEMS C CXX)
+ set(launcher ${CMAKE_BINARY_DIR}/launch-${lang})
+ file(WRITE ${launcher} "#!/bin/bash\n\n")
+ foreach(key_val IN LISTS ccache_env)
+ file(APPEND ${launcher} "export ${key_val}\n")
+ endforeach()
+ file(APPEND ${launcher}
+ "exec \"${CCACHE_PROGRAM}\" \"${CMAKE_${lang}_COMPILER}\" \"$@\"\n")
+ execute_process(COMMAND chmod a+rx ${launcher})
+ endforeach()
+ set(CMAKE_XCODE_ATTRIBUTE_CC ${CMAKE_BINARY_DIR}/launch-C PARENT_SCOPE)
+ set(CMAKE_XCODE_ATTRIBUTE_CXX ${CMAKE_BINARY_DIR}/launch-CXX PARENT_SCOPE)
+ set(CMAKE_XCODE_ATTRIBUTE_LD ${CMAKE_BINARY_DIR}/launch-C PARENT_SCOPE)
+ set(CMAKE_XCODE_ATTRIBUTE_LDPLUSPLUS ${CMAKE_BINARY_DIR}/launch-CXX PARENT_SCOPE)
+ endif()
+endfunction()
+
+option(USE_CCACHE "Use ccache to speed up recompilation time" TRUE)
+if(USE_CCACHE)
+ use_ccache()
+endif()
--- /dev/null
+# Calls `message(VERBOSE msg)` if and only if VERBOSE is available (since CMake 3.15).
+# Call CMake with --loglevel=VERBOSE to view those messages.
+function(message_verbose msg)
+ if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15")
+ message(VERBOSE ${msg})
+ endif()
+endfunction()
+
+function(use_fastest_linker)
+ if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
+ message(WARNING "use_fastest_linker() disabled, as it is not called at the project top level")
+ return()
+ endif()
+
+ find_program(FASTER_LINKER ld.lld)
+ if(NOT FASTER_LINKER)
+ find_program(FASTER_LINKER ld.gold)
+ endif()
+
+ if(FASTER_LINKER)
+ # Note: Compiler flag -fuse-ld requires gcc 9 or clang 3.8.
+ # Instead override CMAKE_CXX_LINK_EXECUTABLE directly.
+ # By default CMake uses the compiler executable for linking.
+ set(CMAKE_CXX_LINK_EXECUTABLE "${FASTER_LINKER} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+ message_verbose("Using ${FASTER_LINKER} linker for faster linking")
+ else()
+ message_verbose("Using default linker")
+ endif()
+endfunction()
+
+option(USE_FASTER_LINKER "Use the lld or gold linker instead of the default for faster linking" TRUE)
+if(USE_FASTER_LINKER)
+ use_fastest_linker()
+endif()
// features on Mac OS X, so we need _DARWIN_C_SOURCE to re-enable them.
#cmakedefine _DARWIN_C_SOURCE
-// Define to activate features from IEEE Stds 1003.1-2001.
+// Define to activate features from IEEE Stds 1003.1-2008.
#define _POSIX_C_SOURCE 200809L
#if defined(__SunOS_5_8) || defined(__SunOS_5_9) || defined(__SunOS_5_10)
# define _XOPEN_SOURCE 500
+#elif defined(__FreeBSD__)
+# define _XOPEN_SOURCE 700
#elif !defined(__SunOS_5_11) && !defined(__APPLE__)
# define _XOPEN_SOURCE
#endif
// Define if the system has the type "long long".
#cmakedefine HAVE_LONG_LONG
-// Define if you have the "mkstemp" function.
-#cmakedefine HAVE_MKSTEMP
-
// Define if you have the "posix_fallocate.
#cmakedefine HAVE_POSIX_FALLOCATE
+// Define if you have the "pthread_mutexattr_setpshared" function.
+#cmakedefine HAVE_PTHREAD_MUTEXATTR_SETPSHARED
+
// Define if you have the <pwd.h> header file.
#cmakedefine HAVE_PWD_H
extern const char CCACHE_VERSION[];
-const char CCACHE_VERSION[] = "@VERSION@";
+const char CCACHE_VERSION[] = "@CCACHE_VERSION@";
* Andrew Stubbs
* Andrew Tridgell
* Arne Hasselbring
+* Azat Khuzhin
* Bernhard Bauer
* Björn Jacke
* Breno Guimaraes
* Jørgen P. Tjernø
* Josh Soref
* Justin Lebar
+* Ka Ho Ng
* Karl Chen
+* Khem Raj
* Kona Blend
* Kovarththanan Rajaratnam
* Lalit Chhabra
* Mizuha Himuraki
* Mostyn Bramley-Moore
* Neil Mushell
+* Nicholas Hutchinson
* Nick Schultz
* Norbert Lange
* Oded Shimon
* Ryan Brown
* Ryan Egesdahl
* Sam Gross
+* Sergei Trofimovich
* Steffen Dettmer
+* Stuart Henderson
* Sumit Jamgade
* Thomas Otto
* Thomas Röfer
mark_as_advanced(ASCIIDOC_EXE) # Don't show in CMake UIs
if(NOT ASCIIDOC_EXE)
- message(WARNING "Could not find asciidoc; documentation will not be generated")
+ message(NOTICE "Could not find asciidoc; documentation will not be generated")
else()
#
# HTML documentation
COMMAND
${ASCIIDOC_EXE}
-o "${html_file}"
- -a revnumber="${VERSION}"
+ -a revnumber="${CCACHE_VERSION}"
-a toc
-b xhtml11
"${CMAKE_SOURCE_DIR}/${adoc_file}"
COMMAND
${ASCIIDOC_EXE}
-o -
- -a revnumber=${VERSION}
+ -a revnumber=${CCACHE_VERSION}
-d manpage
-b docbook "${CMAKE_SOURCE_DIR}/doc/MANUAL.adoc"
| perl -pe 's!<literal>\(.*?\)</literal>!<emphasis role="strong">\\1</emphasis>!g'
COMMAND ${A2X_EXE} --doctype manpage --format manpage MANUAL.xml
MAIN_DEPENDENCY MANUAL.xml
)
- add_custom_target(doc-man-page ALL DEPENDS ccache.1)
+ add_custom_target(doc-man-page DEPENDS ccache.1)
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/ccache.1"
DESTINATION "${CMAKE_INSTALL_MANDIR}/man1"
set(doc_files "${doc_files}" ccache.1)
endif()
- add_custom_target(doc DEPENDS "${doc_files}")
+ add_custom_target(doc ALL DEPENDS "${doc_files}")
endif()
from the Internet and unpack it in the local binary tree. Ccache will
then be linked statically to the locally built libzstd.
+ To link libzstd statically you can use `-DZSTD_LIBRARY=/path/to/libzstd.a`.
+
Optional:
- GNU Bourne Again SHell (bash) for tests.
WARNING: The technique of letting ccache masquerade as the compiler works well,
but currently doesn't interact well with other tools that do the same thing.
-See <<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER
-COMPILER WRAPPERS>>.
+See _<<_using_ccache_with_other_compiler_wrappers,Using ccache with other
+compiler wrappers>>_.
WARNING: Use a symbolic links for masquerading, not hard links.
*`-X`* _LEVEL_, *`--recompress`* _LEVEL_::
- Recompress the cache using compression level _LEVEL_. The level can be an
- integer, with the same semantics as the
- <<config_compression_level,*compression_level*>> configuration setting), or
+ Recompress the cache to level _LEVEL_ using the Zstandard algorithm. The
+ level can be an integer, with the same semantics as the
+ <<config_compression_level,*compression_level*>> configuration option), or
the special value *uncompressed* for no compression. See
- <<_cache_compression,CACHE COMPRESSION>> for more information. This can
+ _<<_cache_compression,Cache compression>>_ for more information. This can
potentionally take a long time since all files in the cache need to be
visited. Only files that are currently compressed with a different level
than _LEVEL_ will be recompressed.
*`-o`* _KEY=VALUE_, *`--set-config`* _KEY_=_VALUE_::
Set configuration option _KEY_ to _VALUE_. See
- <<_configuration,CONFIGURATION>> for more information.
+ _<<_configuration,Configuration>>_ for more information.
*`-x`*, *`--show-compression`*::
- Print cache compression statistics. See <<_cache_compression,CACHE
- COMPRESSION>> for more information. This can potentionally take a long time
- since all files in the cache need to be visited.
+ Print cache compression statistics. See _<<_cache_compression,Cache
+ compression>>_ for more information. This can potentionally take a long
+ time since all files in the cache need to be visited.
*`-p`*, *`--show-config`*::
*`-k`* _KEY_, *`--get-config`* _KEY_::
Print the value of configuration option _KEY_. See
- <<_configuration,CONFIGURATION>> for more information.
+ _<<_configuration,Configuration>>_ for more information.
*`--hash-file`* _PATH_::
*`--print-stats`*::
- Print statistics counter IDs and corresponding values machine-parsable
+ Print statistics counter IDs and corresponding values in machine-parsable
(tab-separated) format.
Configuration
-------------
-ccache's default behavior can be overridden by settings in configuration files,
+ccache's default behavior can be overridden by options in configuration files,
which in turn can be overridden by environment variables with names starting
with *CCACHE_*. Ccache normally reads configuration from two files: first a
system-level configuration file and secondly a cache-specific configuration
-file. The priority of configuration settings is as follows (where 1 is
+file. The priorities of configuration options are as follows (where 1 is
highest):
1. Environment variables.
this:
1. If *CCACHE_CONFIGPATH* is set, use that path.
-2. Otherwise, if <<config_cache_dir,*cache_dir*>> (*CCACHE_DIR*) is set then
- use *<ccache_dir>/ccache.conf*.
-3. Otherwise, if there is a legacy *$HOME/.ccache* directory then use
+2. Otherwise, if the environment variable *CCACHE_DIR* is set then use
+ *$CCACHE_DIR/ccache.conf*.
+3. Otherwise, if <<config_cache_dir,*cache_dir*>> is set in the secondary
+ (system-wide) configuration file then use *<cache_dir>/ccache.conf*.
+4. Otherwise, if there is a legacy *$HOME/.ccache* directory then use
*$HOME/.ccache/ccache.conf*.
-4. Otherwise, if *XDG_CONFIG_HOME* is set then use
+5. Otherwise, if *XDG_CONFIG_HOME* is set then use
*$XDG_CONFIG_HOME/ccache/ccache.conf*.
-5. Otherwise, use *%APPDATA%/ccache/ccache.conf* (Windows),
+6. Otherwise, use *%APPDATA%/ccache/ccache.conf* (Windows),
*$HOME/Library/Preferences/ccache/ccache.conf* (macOS) or
*$HOME/.config/ccache/ccache.conf* (other systems).
Configuration file syntax
~~~~~~~~~~~~~~~~~~~~~~~~~
-Configuration files are in a simple ``key = value'' format, one setting per
+Configuration files are in a simple ``key = value'' format, one option per
line. Lines starting with a hash sign are comments. Blank lines are ignored, as
is whitespace surrounding keys and values. Example:
directory, but only absolute paths that begin with *base_dir*. Cache
results can then be shared for compilations in different directories even
if the project uses absolute paths in the compiler command line. See also
- the discussion under <<_compiling_in_different_directories,COMPILING IN
- DIFFERENT DIRECTORIES>>. If set to the empty string (which is the default),
- no rewriting is done.
+ the discussion under _<<_compiling_in_different_directories,Compiling in
+ different directories>>_. If set to the empty string (which is the
+ default), no rewriting is done.
+
A typical path to use as *base_dir* is your home directory or another directory
that is a parent of your project directories. Don't use `/` as the base
[[config_cache_dir]] *cache_dir* (*CCACHE_DIR*)::
This option specifies where ccache will keep its cached compiler outputs.
- It will only take effect if set in the system-wide configuration file or as
- an environment variable. The default is *$XDG_CACHE_HOME/ccache* if
- *XDG_CACHE_HOME* is set, otherwise *$HOME/.cache/ccache*. Exception: If the
- legacy directory *$HOME/.ccache* exists then that directory is the default.
-
- See also <<_location_of_the_primary_configuration_file,LOCATION OF THE
- PRIMARY CONFIGURATION FILE>>.
-
- If you want to use another *CCACHE_DIR* value temporarily for one ccache
- invocation you can use the `-d/--directory` command line option instead.
+ The default is *$XDG_CACHE_HOME/ccache* if *XDG_CACHE_HOME* is set,
+ otherwise *$HOME/.cache/ccache*. Exception: If the legacy directory
+ *$HOME/.ccache* exists then that directory is the default.
++
+See also _<<_location_of_the_primary_configuration_file,Location of the primary
+configuration file>>_.
++
+If you want to use another *CCACHE_DIR* value temporarily for one ccache
+invocation you can use the `-d/--directory` command line option instead.
[[config_compiler]] *compiler* (*CCACHE_COMPILER* or (deprecated) *CCACHE_CC*)::
don't affect code generation). You should only use *none* if you know what
you are doing.
*string:value*::
- Use *value* as the string to calculate hash from. This can be the compiler
- revision number you retrieved earlier and set here via environment variable.
+ Hash *value*. This can for instance be a compiler revision number or
+ another string that the build system generates to identify the compiler.
_a command string_::
Hash the standard output and standard error output of the specified
command. The string will be split on whitespace to find out the command and
method will hash the mtime and size of the other compiler wrapper, which means
that ccache won't be able to detect a compiler upgrade. Using a suitable
command to identify the compiler is thus safer, but it's also slower, so you
-should consider continue using the *mtime* method in combination with
-the *prefix_command* option if possible. See
-<<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER COMPILER
-WRAPPERS>>.
+should consider continue using the *mtime* method in combination with the
+*prefix_command* option if possible. See
+_<<_using_ccache_with_other_compiler_wrappers,Using ccache with other compiler
+wrappers>>_.
--
--
distcc's "pump" script.
--
-[[config_compression]] *compression* (*CCACHE_COMPRESS* or *CCACHE_NOCOMPRESS*, see <<_boolean_values,Boolean values>> above)::
+[[config_compression]] *compression* (*CCACHE_COMPRESS* or *CCACHE_NOCOMPRESS*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will compress data it puts in the cache. However, this
option has no effect on how files are retrieved from the cache; compressed
A positive value corresponds to normal Zstandard compression levels. Lower
levels (e.g. *1*) mean faster compression but worse compression ratio.
Higher levels (e.g. *19*) mean slower compression but better compression
- ratio. The maximum possible value depends on the libzstd version.
- Decompression speed is essentially the same for all levels.
+ ratio. The maximum possible value depends on the libzstd version, but at
+ least up to 19 is available for all versions. Decompression speed is
+ essentially the same for all levels. As a rule of thumb, use level 5 or
+ lower since higher levels may slow down compilations noticeably. Higher
+ levels are however useful when recompressing the cache with command line
+ option *-X/--recompress*.
*< 0*::
- A negative value corresponds to Zstandard's “ultra-fast” compression
+ A negative value corresponds to Zstandard's ``ultra-fast'' compression
levels, which are even faster than level 1 but with less good compression
- ratios. For instance, level *-3* corresponds to “--fast=3” for the *zstd*
- command line tool.
+ ratios. For instance, level *-3* corresponds to ``--fast=3'' for the *zstd*
+ command line tool. In practice, there is little use for levels lower than
+ *-5* or so.
*0* (default)::
The value *0* means that ccache will choose a suitable level, currently
*1*.
--
++
+See the http://zstd.net[Zstandard documentation] for more information.
[[config_cpp_extension]] *cpp_extension* (*CCACHE_EXTENSION*)::
compiled, but that sometimes doesn't work. For example, when using the
``aCC'' compiler on HP-UX, set the cpp extension to *i*.
-[[config_debug]] *debug* (*CCACHE_DEBUG* or *CCACHE_NODEBUG*, see <<_boolean_values,Boolean values>> above)::
+[[config_debug]] *debug* (*CCACHE_DEBUG* or *CCACHE_NODEBUG*, see _<<_boolean_values,Boolean values>>_ above)::
If true, enable the debug mode. The debug mode creates per-object debug
files that are helpful when debugging unexpected cache misses. Note however
that ccache performance will be reduced slightly. See
- <<_cache_debugging,debugging>> for more information. The default is false.
+ _<<_cache_debugging,Cache debugging>>_ for more information. The default is
+ false.
-[[config_depend_mode]] *depend_mode* (*CCACHE_DEPEND* or *CCACHE_NODEPEND*, see <<_boolean_values,Boolean values>> above)::
+[[config_debug_dir]] *debug_dir* (*CCACHE_DEBUGDIR*)::
+
+ Specifies where to write per-object debug files if the _<<config_debug,debug
+ mode>>_ is enabled. If set to the empty string, the files will be written
+ next to the object file. If set to a directory, the debug files will be
+ written with full absolute paths in that directory, creating it if needed.
+ The default is the empty string.
+
+ For example, if *debug_dir* is set to `/example`, the current working
+ directory is `/home/user` and the object file is `build/output.o` then the
+ debug log will be written to `/example/home/user/build/output.o.ccache-log`.
+ See also _<<_cache_debugging,Cache debugging>>_.
+
+[[config_depend_mode]] *depend_mode* (*CCACHE_DEPEND* or *CCACHE_NODEPEND*, see _<<_boolean_values,Boolean values>>_ above)::
If true, the depend mode will be used. The default is false. See
- <<_the_depend_mode,THE DEPEND MODE>>.
+ _<<_the_depend_mode,The depend mode>>_.
-[[config_direct_mode]] *direct_mode* (*CCACHE_DIRECT* or *CCACHE_NODIRECT*, see <<_boolean_values,Boolean values>> above)::
+[[config_direct_mode]] *direct_mode* (*CCACHE_DIRECT* or *CCACHE_NODIRECT*, see _<<_boolean_values,Boolean values>>_ above)::
If true, the direct mode will be used. The default is true. See
- <<_the_direct_mode,THE DIRECT MODE>>.
+ _<<_the_direct_mode,The direct mode>>_.
-[[config_disable]] *disable* (*CCACHE_DISABLE* or *CCACHE_NODISABLE*, see <<_boolean_values,Boolean values>> above)::
+[[config_disable]] *disable* (*CCACHE_DISABLE* or *CCACHE_NODISABLE*, see _<<_boolean_values,Boolean values>>_ above)::
When true, ccache will just call the real compiler, bypassing the cache
completely. The default is false.
hash sum that identifies the build. The list separator is semicolon on
Windows systems and colon on other systems.
-[[config_file_clone]] *file_clone* (*CCACHE_FILECLONE* or *CCACHE_NOFILECLONE*, see <<_boolean_values,Boolean values>> above)::
+[[config_file_clone]] *file_clone* (*CCACHE_FILECLONE* or *CCACHE_NOFILECLONE*, see _<<_boolean_values,Boolean values>>_ above)::
- If true, ccache will attempt to use file cloning (also known as “copy on
- write”, “CoW” or “reflinks”) to store and fetch cached compiler results.
+ If true, ccache will attempt to use file cloning (also known as ``copy on
+ write'', ``CoW'' or ``reflinks'') to store and fetch cached compiler results.
*file_clone* has priority over <<config_hard_link,*hard_link*>>. The
default is false.
+
systems, ccache will fall back to use plain copying (or hard links if
<<config_hard_link,*hard_link*>> is enabled).
-[[config_hard_link]] *hard_link* (*CCACHE_HARDLINK* or *CCACHE_NOHARDLINK*, see <<_boolean_values,Boolean values>> above)::
+[[config_hard_link]] *hard_link* (*CCACHE_HARDLINK* or *CCACHE_NOHARDLINK*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will attempt to use hard links to store and fetch cached
object files. The default is false.
*file.o* in build tree A as well. This can retrigger relinking in build tree
A even though nothing really has changed.
-[[config_hash_dir]] *hash_dir* (*CCACHE_HASHDIR* or *CCACHE_NOHASHDIR*, see <<_boolean_values,Boolean values>> above)::
+[[config_hash_dir]] *hash_dir* (*CCACHE_HASHDIR* or *CCACHE_NOHASHDIR*, see _<<_boolean_values,Boolean values>>_ above)::
If true (which is the default), ccache will include the current working
directory (CWD) in the hash that is used to distinguish two compilations
Exception: The CWD will not be included in the hash if
<<config_base_dir,*base_dir*>> is set (and matches the CWD) and the
compiler option *-fdebug-prefix-map* is used. See also the discussion under
- <<_compiling_in_different_directories,COMPILING IN DIFFERENT DIRECTORIES>>.
+ _<<_compiling_in_different_directories,Compiling in different
+ directories>>_.
+
The reason for including the CWD in the hash by default is to prevent a problem
with the storage of the current working directory in the debug info of an
example, `-fmessage-length=*` will match both `-fmessage-length=20` and
`-fmessage-length=70`.
-[[config_inode_cache]] *inode_cache* (*CCACHE_INODECACHE* or *CCACHE_NOINODECACHE*, see <<_boolean_values,Boolean values>> above)::
+[[config_inode_cache]] *inode_cache* (*CCACHE_INODECACHE* or *CCACHE_NOINODECACHE*, see _<<_boolean_values,Boolean values>>_ above)::
If true, enables caching of source file hashes based on device, inode and
timestamps. This will reduce the time spent on hashing included files as
+
The feature requires *temporary_dir* to be located on a local filesystem.
-[[config_keep_comments_cpp]] *keep_comments_cpp* (*CCACHE_COMMENTS* or *CCACHE_NOCOMMENTS*, see <<_boolean_values,Boolean values>> above)::
+[[config_keep_comments_cpp]] *keep_comments_cpp* (*CCACHE_COMMENTS* or *CCACHE_NOCOMMENTS*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will not discard the comments before hashing preprocessor
output. This can be used to check documentation with *-Wdocumentation*.
Sets the limit when cleaning up. Files are deleted (in LRU order) until the
levels are below the limit. The default is 0.8 (= 80%). See
- <<_automatic_cleanup,AUTOMATIC CLEANUP>> for more information.
+ _<<_automatic_cleanup,Automatic cleanup>>_ for more information.
[[config_log_file]] *log_file* (*CCACHE_LOGFILE*)::
This option specifies the maximum number of files to keep in the cache. Use
0 for no limit (which is the default). See also
- <<_cache_size_management,CACHE SIZE MANAGEMENT>>.
+ _<<_cache_size_management,Cache size management>>_.
[[config_max_size]] *max_size* (*CCACHE_MAXSIZE*)::
This option specifies the maximum size of the cache. Use 0 for no limit.
The default value is 5G. Available suffixes: k, M, G, T (decimal) and Ki,
Mi, Gi, Ti (binary). The default suffix is G. See also
- <<_cache_size_management,CACHE SIZE MANAGEMENT>>.
+ _<<_cache_size_management,Cache size management>>_.
[[config_path]] *path* (*CCACHE_PATH*)::
matching the compiler name in the normal *PATH* that isn't a symbolic link
to ccache itself.
-[[config_pch_external_checksum]] *pch_external_checksum* (*CCACHE_PCH_EXTSUM* or *CCACHE_NOPCH_EXTSUM*, see <<_boolean_values,Boolean values>> above)::
+[[config_pch_external_checksum]] *pch_external_checksum* (*CCACHE_PCH_EXTSUM* or *CCACHE_NOPCH_EXTSUM*, see _<<_boolean_values,Boolean values>>_ above)::
When this option is set, and ccache finds a precompiled header file,
ccache will look for a file with the extension ``.sum'' added
This option adds a list of prefixes (separated by space) to the command
line that ccache uses when invoking the compiler. See also
- <<_using_ccache_with_other_compiler_wrappers,USING CCACHE WITH OTHER
- COMPILER WRAPPERS>>.
+ _<<_using_ccache_with_other_compiler_wrappers,Using ccache with other
+ compiler wrappers>>_.
[[config_prefix_command_cpp]] *prefix_command_cpp* (*CCACHE_PREFIX_CPP*)::
This option adds a list of prefixes (separated by space) to the command
line that ccache uses when invoking the preprocessor.
-[[config_read_only]] *read_only* (*CCACHE_READONLY* or *CCACHE_NOREADONLY*, see <<_boolean_values,Boolean values>> above)::
+[[config_read_only]] *read_only* (*CCACHE_READONLY* or *CCACHE_NOREADONLY*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will attempt to use existing cached results, but it will not
add new results to the cache. Statistics counters will still be updated,
temporary files otherwise. You may also want to set <<config_stats,*stats*>> to
*false* make ccache not even try to update stats files.
-[[config_read_only_direct]] *read_only_direct* (*CCACHE_READONLY_DIRECT* or *CCACHE_NOREADONLY_DIRECT*, see <<_boolean_values,Boolean values>> above)::
+[[config_read_only_direct]] *read_only_direct* (*CCACHE_READONLY_DIRECT* or *CCACHE_NOREADONLY_DIRECT*, see _<<_boolean_values,Boolean values>>_ above)::
Just like <<config_read_only,*read_only*>> except that ccache will only try
to retrieve results from the cache using the direct mode, not the
preprocessor mode. See documentation for <<config_read_only,*read_only*>>
regarding using a read-only ccache directory.
-[[config_recache]] *recache* (*CCACHE_RECACHE* or *CCACHE_NORECACHE*, see <<_boolean_values,Boolean values>> above)::
+[[config_recache]] *recache* (*CCACHE_RECACHE* or *CCACHE_NORECACHE*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will not use any previously stored result. New results will
still be cached, possibly overwriting any pre-existing results.
-[[config_run_second_cpp]] *run_second_cpp* (*CCACHE_CPP2* or *CCACHE_NOCPP2*, see <<_boolean_values,Boolean values>> above)::
+[[config_run_second_cpp]] *run_second_cpp* (*CCACHE_CPP2* or *CCACHE_NOCPP2*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will first run the preprocessor to preprocess the source
- code (see <<_the_preprocessor_mode,THE PREPROCESSOR MODE>>) and then on a
+ code (see _<<_the_preprocessor_mode,The preprocessor mode>>_) and then on a
cache miss run the compiler on the source code to get hold of the object
file. This is the default.
+
Ignore ctimes when *file_stat_matches* is enabled. This can be useful when
backdating files' mtimes in a controlled way.
*include_file_ctime*::
- By default, ccache will not cache a file if it includes a header whose
- ctime is too new. This sloppiness disables that check.
+ By default, ccache will not cache a file if it includes a header whose ctime
+ is too new. This sloppiness disables that check. See also
+ _<<_handling_of_newly_created_header_files,Handling of newly created header
+ files>>_.
*include_file_mtime*::
By default, ccache will not cache a file if it includes a header whose
- mtime is too new. This sloppiness disables that check.
+ mtime is too new. This sloppiness disables that check. See also
+ _<<_handling_of_newly_created_header_files,Handling of newly created header
+ files>>_.
*locale*::
Ccache includes the environment variables *LANG*, *LC_ALL*, *LC_CTYPE* and
*LC_MESSAGES* in the hash by default since they may affect localization of
that.
*pch_defines*::
Be sloppy about **#define**s when precompiling a header file. See
- <<_precompiled_headers,PRECOMPILED HEADERS>> for more information.
+ _<<_precompiled_headers,Precompiled headers>>_ for more information.
*modules*::
By default, ccache will not cache compilations if *-fmodules* is used since
it cannot hash the state of compiler's internal representation of relevant
modules. This sloppiness allows caching in such a case. See
- <<_c_modules,C++ MODULES>> for more information.
+ _<<_c_modules,C++ modules>>_ for more information.
*system_headers*::
By default, ccache will also include all system headers in the manifest.
With this sloppiness set, ccache will only include system headers in the
source code.
--
+
-See the discussion under <<_troubleshooting,TROUBLESHOOTING>> for more
+See the discussion under _<<_troubleshooting,Troubleshooting>>_ for more
information.
-[[config_stats]] *stats* (*CCACHE_STATS* or *CCACHE_NOSTATS*, see <<_boolean_values,Boolean values>> above)::
+[[config_stats]] *stats* (*CCACHE_STATS* or *CCACHE_NOSTATS*, see _<<_boolean_values,Boolean values>>_ above)::
If true, ccache will update the statistics counters on each compilation.
The default is true.
-----------------
Ccache will by default compress all data it puts into the cache using the
-compression algorithm Zstandard (zstd) using compression level 1. The algorithm
-is fast enough that there should be little reason to turn off compression to
-gain performance. One exception is if the cache is located on a compressed file
-system, in which case the compression performed by ccache of course is
-redundant. See the documentation for the configuration options
+compression algorithm http://zstd.net[Zstandard] (zstd) using compression level
+1. The algorithm is fast enough that there should be little reason to turn off
+compression to gain performance. One exception is if the cache is located on a
+compressed file system, in which case the compression performed by ccache of
+course is redundant. See the documentation for the configuration options
<<config_compression,*compression*>> and
<<config_compression_level,*compression_level*>> for more information.
Notes:
-* The “disk blocks” size is the cache size when taking disk block size into
- account. This value should match the “cache size” value from “ccache
- --show-stats”. The other size numbers refer to actual content sizes.
-* “Compressed data” refers to result and manifest files stored in the cache.
-* “Incompressible data” refers to files that are always stored uncompressed
+* The ``disk blocks'' size is the cache size when taking disk block size into
+ account. This value should match the ``cache size'' value from ``ccache
+ --show-stats''. The other size numbers refer to actual content sizes.
+* ``Compressed data'' refers to result and manifest files stored in the cache.
+* ``Incompressible data'' refers to files that are always stored uncompressed
(triggered by enabling <<config_file_clone,*file_clone*>> or
<<config_hard_link,*hard_link*>>) or unknown files (for instance files
created by older ccache versions).
Current size of the cache.
| called for link |
-The compiler was called for linking, not compiling.
+The compiler was called for linking, not compiling. Ccache only supports
+compilation of a single file, i.e. calling the compiler with the *-c* option to
+produce a single object file from a single source file.
| called for preprocessing |
The compiler was called for preprocessing, not compiling.
If no previous result is detected (i.e., there is a cache miss) using the
direct mode, ccache will fall back to the preprocessor mode unless the *depend
mode* is enabled. In the depend mode, ccache never runs the preprocessor, not
-even on cache misses. Read more in <<_the_depend_mode,THE DEPEND MODE>>
+even on cache misses. Read more in _<<_the_depend_mode,The depend mode>>_
below.
* The compiler is not generating dependencies using *-MD* or *-MMD*.
+Handling of newly created header files
+--------------------------------------
+
+If modification time (mtime) or status change time (ctime) of one of the include
+files is the same second as the time compilation is being done, ccache disables
+the direct mode (or, in the case of a <<_precompiled_headers,precompiled
+header>>, disables caching completely). This done as a safety measure to avoid a
+race condition (see below).
+
+To be able to use a newly created header files in direct mode (or use a newly
+precompiled header), either:
+
+* create the include file earlier in the build process, or
+* set <<config_sloppiness,*sloppiness*>> to
+ *include_file_ctime,include_file_mtime* if you are willing to take the risk,
+ for instance if you know that your build system is robust enough not to
+ trigger the race condition.
+
+For reference, the race condition mentioned above consists of these events:
+
+1. The preprocessor is run.
+2. An include file is modified by someone.
+3. The new include file is hashed by ccache.
+4. The real compiler is run on the preprocessor's output, which contains data
+ from the old header file.
+5. The wrong object file is stored in the cache.
+
+
Cache debugging
---------------
|==============================================================================
+If <<config_debug_dir,*config_dir*>> (environment variable *CCACHE_DEBUGDIR*) is
+set, the files above will be written to that directory with full absolute paths
+instead of next to the object file.
+
In the direct mode, ccache uses the 160 bit BLAKE3 hash of the
*ccache-input-c* + *ccache-input-d* data (where *+* means concatenation), while
the *ccache-input-c* + *ccache-input-p* data is used in the preprocessor mode.
The *ccache-input-text* file is a combined text version of the three
-binary input files. It has three sections (“COMMON”, “DIRECT MODE” and
-“PREPROCESSOR MODE”), which is turn contain annotations that say what kind of
+binary input files. It has three sections (``COMMON'', ``DIRECT MODE'' and
+``PREPROCESSOR MODE''), which is turn contain annotations that say what kind of
data comes next.
-To debug why you don’t get an expected cache hit for an object file, you can do
+To debug why you don't get an expected cache hit for an object file, you can do
something like this:
1. Build with debug mode enabled.
`__TIMESTAMP__` is used when using a precompiled header. Further, it can't
detect changes in **#define**s in the source code because of how
preprocessing works in combination with precompiled headers.
+* You may also want to include *include_file_mtime,include_file_ctime* in
+ <<config_sloppiness,*sloppiness*>>. See
+ _<<_handling_of_newly_created_header_files,Handling of newly created header
+ files>>_.
* You must either:
+
--
-------
* The direct mode fails to pick up new header files in some rare scenarios. See
- <<_the_direct_mode,THE DIRECT MODE>> above.
+ _<<_the_direct_mode,The direct mode>>_ above.
Troubleshooting
A general tip for getting information about what ccache is doing is to enable
debug logging by setting the configuration option <<config_debug,*debug*>> (or
-the environment variable *CCACHE_DEBUG*); see <<_cache_debugging,debugging>>
-for more information. Another way of keeping track of what is happening is to
-check the output of *ccache -s*.
+the environment variable *CCACHE_DEBUG*); see _<<_cache_debugging,Cache
+debugging>>_ for more information. Another way of keeping track of what is
+happening is to check the output of *ccache -s*.
Performance
*-Wp,-MMD,_path_*, and *-Wp,-D_define_*) is used.
** This was the first compilation with a new value of the
<<config_base_dir,base directory>>.
-** A modification time of one of the include files is too new (created the same
- second as the compilation is being done). This check is made to avoid a race
- condition. To fix this, create the include file earlier in the build
- process, if possible, or set <<config_sloppiness,*sloppiness*>> to
- *include_file_ctime, include_file_mtime* if you are willing to take the risk.
- (The race condition consists of these events: the preprocessor is run; an
- include file is modified by someone; the new include file is hashed by
- ccache; the real compiler is run on the preprocessor's output, which contains
- data from the old header file; the wrong object file is stored in the cache.)
+** A modification or status change time of one of the include files is too new
+ (created the same second as the compilation is being done). See
+ _<<_handling_of_newly_created_header_files,Handling of newly created header
+ files>>_.
** The `__TIME__` preprocessor macro is (potentially) being used. Ccache turns
off direct mode if `__TIME__` is present in the source code. This is done as
a safety measure since the string indicates that a `__TIME__` macro _may_
* If ``unsupported compiler option'' has been incremented, enable debug logging
and check which compiler option was rejected.
* If ``preprocessor error'' has been incremented, one possible reason is that
- precompiled headers are being used. See <<_precompiled_headers,PRECOMPILED
- HEADERS>> for how to remedy this.
+ precompiled headers are being used. See _<<_precompiled_headers,Precompiled
+ headers>>_ for how to remedy this.
* If ``can't use precompiled header'' has been incremented, see
- <<_precompiled_headers,PRECOMPILED HEADERS>>.
-* If ``can't use modules'' has been incremented, see
- <<_c_modules,C++ MODULES>>.
+ _<<_precompiled_headers,Precompiled headers>>_.
+* If ``can't use modules'' has been incremented, see _<<_c_modules,C++
+ modules>>_.
Corrupt object files
Ccache news
===========
+Ccache 4.2
+----------
+Release date: 2021-02-02
+
+New features
+~~~~~~~~~~~~
+
+- Improved calculation of relative paths when using `base_dir` to also consider
+ canonical paths (i.e. paths with dereferenced symlinks) as candidates.
+
+- Added a `debug_dir` (`CCACHE_DEBUGDIR`) configuration setting for specifying a
+ directory for files written in debug mode.
+
+- Added support for compiler option `-x cuda`, understood by Clang.
+
+- The value of the `SOURCE_DATE_EPOCH` variable is now only hashed if it
+ potentially affects the output from ccache. This means that ccache now (like
+ before version 4.0) will be able produce cache hits for source code that
+ doesn't contain `__DATE__` or `__TIME__` macros regardless of the value of
+ `SOURCE_DATE_EPOCH`.
+
+
+Bug fixes
+~~~~~~~~~
+
+- Fixed a bug where a non-Clang compiler would silently accept the
+ Clang-specific `-f(no-)color-diagnostics` option when run via ccache. This
+ confused feature detection made by e.g. CMake.
+
+- Improved creation of temporary files on Windows. Previously, ccache would in
+ practice reuse temporary filenames on said platform resulting in various
+ problems with parallel builds.
+
+- Fixed creation of parent directories when creating a lock file on Windows.
+
+- Fixed a race condition related to removal of temporary files.
+
+- Improved calculation of directory name for a Windows-style path.
+
+- A compilation result is now not stored in the cache if an included
+ preprocessed header file is too new. This fixes a bug where the content of a
+ newly created preprocessed header file could be missing from the hash,
+ resulting in a false positive cache hit.
+
+- Fixed calculation of the split DWARF filename for an object filename with zero
+ or multiple dots.
+
+- Fixed retrieval of the object file the destination is `/dev/null`.
+
+
+Portability and build improvements
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Additional compiler flags like `-Wextra -Werror` are now only added when
+ building ccache in developer mode.
+
+- The developer build mode no longer enables `-Weverything` for Clang.
+
+- `_XOPEN_SOURCE` is now defined appropriately on FreeBSD to fix missing
+ declaration of `isascii`.
+
+- Improved detection of buildability of BLAKE3 assembler files.
+
+- Disabled build of inode cache code on OSes without
+ `pthread_mutexattr_setpshared`, such as OpenBSD.
+
+- Made static linking the default for a Windows MinGW build.
+
+- Removed legacy fallback replacements of `mkstemp` and `realpath`.
+
+- Improved detection of SSE/AVX support.
+
+- Improved detection of support for the AVX2 target attribute.
+
+- Configuration scripts now try to detect and enable BLAKE3's Neon support.
+
+- Made it possible to run the integration test suite on macOS.
+
+- Fixed building of 32-bit unit tests on macOS.
+
+- Made it possible to compile ccache for C++17.
+
+- Fixed printing of 64-bit `time_t` on 32-bit architectures like RISCV32.
+
+- Made sure to only use ASCII characters in the manual's AsciiDoc source code to
+ make it possible to generate documentation in non-UTF8 locales.
+
+- Upgraded to optional-lite 3.4.0, fmt 7.1.3, doctest 2.4.4 and zstd 1.4.8.
+
+- Took steps towards being able to run the test suite on Windows.
+
+
+Documentation
+~~~~~~~~~~~~~
+
+- Improved wording of `compiler_check` string values.
+
+- Improved documentation of compression levels and the `-X/--recompress` option.
+
+- Improved consistency of terms in the manual.
+
+- HTML documentation is now built and installed by default if possible.
+
+- Fixed incorrect documentation of configuration option `cache_dir`.
+
+- Added hint on how to link statically with libzstd.
+
+- Mention that ccache requires the `-c` compiler option.
+
+
Ccache 4.1
----------
Release date: 2020-11-22
echo "Error: $file not formatted with Clang-Format"
echo 'Run "make format" or apply this diff:'
git diff $cf_color --no-index "$file" "$tmp_file" \
- | sed -r -e "s!^---.*!--- a/$file!" \
+ | sed -E -e "s!^---.*!--- a/$file!" \
-e "s!^\+\+\+.*!+++ b/$file!" \
-e "/diff --/d" -e "/index /d" \
-e "s/.[0-9]*.clang-format.tmp//"
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-readability-else-after-return,
+ -readability-named-parameter,
-readability-qualified-auto,
- -readability-magic-numbers,
+ -readability-redundant-declaration,
performance-*,
-performance-unnecessary-value-param,
modernize-*,
-modernize-avoid-c-arrays,
-modernize-pass-by-value,
+ -modernize-return-braced-init-list,
-modernize-use-auto,
+ -modernize-use-default-member-init,
-modernize-use-trailing-return-type,
cppcoreguidelines-*,
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
-cppcoreguidelines-pro-type-reinterpret-cast,
-cppcoreguidelines-pro-type-union-access,
-cppcoreguidelines-narrowing-conversions,
+ -cppcoreguidelines-non-private-member-variables-in-classes,
+ -cppcoreguidelines-special-member-functions,
bugprone-*,
-bugprone-signed-char-misuse,
-bugprone-branch-clone,
cert-*,
-cert-err34-c,
-cert-dcl50-cpp,
+ -cert-dcl58-cpp,
-cert-err58-cpp,
clang-diagnostic-*,
clang-analyzer-*,
endif()
add_library(ccache_lib STATIC ${source_files})
+target_compile_definitions(
+ ccache_lib PUBLIC -Dnssv_CONFIG_SELECT_STRING_VIEW=nssv_STRING_VIEW_NONSTD
+)
if(WIN32)
target_link_libraries(ccache_lib PRIVATE ws2_32 "psapi")
PRIVATE standard_settings standard_warnings ZSTD::ZSTD
Threads::Threads third_party_lib)
-target_include_directories(ccache_lib PRIVATE ${CMAKE_BINARY_DIR} .)
+target_include_directories(ccache_lib PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
add_subdirectory(third_party)
#include "system.hpp"
#include "Stat.hpp"
-#include "exceptions.hpp"
#include "third_party/nonstd/optional.hpp"
explicit CacheFile(const std::string& path);
- CacheFile(const CacheFile&) = delete;
- CacheFile& operator=(const CacheFile&) = delete;
-
const Stat& lstat() const;
const std::string& path() const;
Type type() const;
private:
- const std::string m_path;
+ std::string m_path;
mutable nonstd::optional<Stat> m_stat;
};
-// Copyright (C) 2019-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
#include "AtomicFile.hpp"
#include "Compression.hpp"
+#include "Sloppiness.hpp"
#include "Util.hpp"
#include "assertions.hpp"
-#include "ccache.hpp"
#include "exceptions.hpp"
#include "fmtmacros.hpp"
compression_level,
cpp_extension,
debug,
+ debug_dir,
depend_mode,
direct_mode,
disable,
{"compression_level", ConfigItem::compression_level},
{"cpp_extension", ConfigItem::cpp_extension},
{"debug", ConfigItem::debug},
+ {"debug_dir", ConfigItem::debug_dir},
{"depend_mode", ConfigItem::depend_mode},
{"direct_mode", ConfigItem::direct_mode},
{"disable", ConfigItem::disable},
{"COMPRESSLEVEL", "compression_level"},
{"CPP2", "run_second_cpp"},
{"DEBUG", "debug"},
+ {"DEBUGDIR", "debug_dir"},
{"DEPEND", "depend_mode"},
{"DIR", "cache_dir"},
{"DIRECT", "direct_mode"},
case ConfigItem::debug:
return format_bool(m_debug);
+ case ConfigItem::debug_dir:
+ return m_debug_dir;
+
case ConfigItem::depend_mode:
return format_bool(m_depend_mode);
m_debug = parse_bool(value, env_var_key, negate);
break;
+ case ConfigItem::debug_dir:
+ m_debug_dir = value;
+ break;
+
case ConfigItem::depend_mode:
m_depend_mode = parse_bool(value, env_var_key, negate);
break;
-// Copyright (C) 2019-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
int8_t compression_level() const;
const std::string& cpp_extension() const;
bool debug() const;
+ const std::string& debug_dir() const;
bool depend_mode() const;
bool direct_mode() const;
bool disable() const;
void set_cache_dir(const std::string& value);
void set_cpp_extension(const std::string& value);
void set_compiler(const std::string& value);
- void set_compiler_type(CompilerType compiler_type);
+ void set_compiler_type(CompilerType value);
void set_depend_mode(bool value);
void set_debug(bool value);
void set_direct_mode(bool value);
std::string m_secondary_config_path;
bool m_absolute_paths_in_stderr = false;
- std::string m_base_dir = "";
+ std::string m_base_dir;
std::string m_cache_dir;
- std::string m_compiler = "";
+ std::string m_compiler;
std::string m_compiler_check = "mtime";
CompilerType m_compiler_type = CompilerType::auto_guess;
bool m_compression = true;
int8_t m_compression_level = 0; // Use default level
- std::string m_cpp_extension = "";
+ std::string m_cpp_extension;
bool m_debug = false;
+ std::string m_debug_dir;
bool m_depend_mode = false;
bool m_direct_mode = true;
bool m_disable = false;
- std::string m_extra_files_to_hash = "";
+ std::string m_extra_files_to_hash;
bool m_file_clone = false;
bool m_hard_link = false;
bool m_hash_dir = true;
- std::string m_ignore_headers_in_manifest = "";
- std::string m_ignore_options = "";
+ std::string m_ignore_headers_in_manifest;
+ std::string m_ignore_options;
bool m_inode_cache = false;
bool m_keep_comments_cpp = false;
double m_limit_multiple = 0.8;
- std::string m_log_file = "";
+ std::string m_log_file;
uint64_t m_max_files = 0;
uint64_t m_max_size = 5ULL * 1000 * 1000 * 1000;
- std::string m_path = "";
+ std::string m_path;
bool m_pch_external_checksum = false;
- std::string m_prefix_command = "";
- std::string m_prefix_command_cpp = "";
+ std::string m_prefix_command;
+ std::string m_prefix_command_cpp;
bool m_read_only = false;
bool m_read_only_direct = false;
bool m_recache = false;
return m_debug;
}
+inline const std::string&
+Config::debug_dir() const
+{
+ return m_debug_dir;
+}
+
inline bool
Config::depend_mode() const
{
void
Context::unlink_pending_tmp_files_signal_safe()
{
- for (const std::string& path : m_pending_tmp_files) {
+ for (auto it = m_pending_tmp_files.rbegin(); it != m_pending_tmp_files.rend();
+ ++it) {
// Don't call Util::unlink_tmp since its log calls aren't signal safe.
- unlink(path.c_str());
+ unlink(it->c_str());
}
// Don't clear m_pending_tmp_files since this method must be signal safe.
}
{
SignalHandlerBlocker signal_handler_blocker;
- for (const std::string& path : m_pending_tmp_files) {
- Util::unlink_tmp(path, Util::UnlinkLog::ignore_failure);
+ for (auto it = m_pending_tmp_files.rbegin(); it != m_pending_tmp_files.rend();
+ ++it) {
+ Util::unlink_tmp(*it, Util::UnlinkLog::ignore_failure);
}
m_pending_tmp_files.clear();
}
#include "Args.hpp"
#include "ArgsInfo.hpp"
#include "Config.hpp"
+#include "Counters.hpp"
#include "Digest.hpp"
#include "File.hpp"
#include "MiniTrace.hpp"
#include "NonCopyable.hpp"
-#include "ccache.hpp"
+#include "Sloppiness.hpp"
#ifdef INODE_CACHE_SUPPORTED
# include "InodeCache.hpp"
#include "Counters.hpp"
-#include "Statistics.hpp"
+#include "Statistic.hpp"
#include "assertions.hpp"
#include <algorithm>
// this program; if not, write to the Free Software Foundation, Inc., 51
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#pragma once
+
class Context;
class Hash;
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
Hash::hash_fd(int fd)
{
return Util::read_fd(
- fd, [=](const void* data, size_t size) { hash(data, size); });
+ fd, [this](const void* data, size_t size) { hash(data, size); });
}
bool
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
return true;
}
-InodeCache::Bucket*
-InodeCache::acquire_bucket(uint32_t index)
+bool
+InodeCache::with_bucket(const Digest& key_digest,
+ const BucketHandler& bucket_handler)
{
+ uint32_t hash;
+ Util::big_endian_to_int(key_digest.bytes(), hash);
+ const uint32_t index = hash % k_num_buckets;
Bucket* bucket = &m_sr->buckets[index];
int err = pthread_mutex_lock(&bucket->mt);
#ifdef HAVE_PTHREAD_MUTEX_ROBUST
LOG(
"Can't consolidate stale mutex at index {}: {}", index, strerror(err));
LOG_RAW("Consider removing the inode cache file if the problem persists");
- return nullptr;
+ return false;
}
LOG("Wiping bucket at index {} because of stale mutex", index);
memset(bucket->entries, 0, sizeof(Bucket::entries));
LOG("Failed to lock mutex at index {}: {}", index, strerror(err));
LOG_RAW("Consider removing the inode cache file if problem persists");
++m_sr->errors;
- return nullptr;
+ return false;
}
#ifdef HAVE_PTHREAD_MUTEX_ROBUST
}
#endif
- return bucket;
-}
-
-InodeCache::Bucket*
-InodeCache::acquire_bucket(const Digest& key_digest)
-{
- uint32_t hash;
- Util::big_endian_to_int(key_digest.bytes(), hash);
- return acquire_bucket(hash % k_num_buckets);
-}
-void
-InodeCache::release_bucket(Bucket* bucket)
-{
+ try {
+ bucket_handler(bucket);
+ } catch (...) {
+ pthread_mutex_unlock(&bucket->mt);
+ throw;
+ }
pthread_mutex_unlock(&bucket->mt);
+ return true;
}
bool
return false;
}
- Bucket* bucket = acquire_bucket(key_digest);
-
- if (!bucket) {
- return false;
- }
-
bool found = false;
-
- for (uint32_t i = 0; i < k_num_entries; ++i) {
- if (bucket->entries[i].key_digest == key_digest) {
- if (i > 0) {
- Entry tmp = bucket->entries[i];
- memmove(&bucket->entries[1], &bucket->entries[0], sizeof(Entry) * i);
- bucket->entries[0] = tmp;
- }
-
- file_digest = bucket->entries[0].file_digest;
- if (return_value) {
- *return_value = bucket->entries[0].return_value;
+ const bool success = with_bucket(key_digest, [&](Bucket* const bucket) {
+ for (uint32_t i = 0; i < k_num_entries; ++i) {
+ if (bucket->entries[i].key_digest == key_digest) {
+ if (i > 0) {
+ Entry tmp = bucket->entries[i];
+ memmove(&bucket->entries[1], &bucket->entries[0], sizeof(Entry) * i);
+ bucket->entries[0] = tmp;
+ }
+
+ file_digest = bucket->entries[0].file_digest;
+ if (return_value) {
+ *return_value = bucket->entries[0].return_value;
+ }
+ found = true;
+ break;
}
- found = true;
- break;
}
+ });
+ if (!success) {
+ return false;
}
- release_bucket(bucket);
LOG("inode cache {}: {}", found ? "hit" : "miss", path);
} else {
++m_sr->misses;
}
- LOG("accumulated stats for inode cache: hits={}, misses={}, errors={}",
+ LOG("Accumulated stats for inode cache: hits={}, misses={}, errors={}",
m_sr->hits.load(),
m_sr->misses.load(),
m_sr->errors.load());
return false;
}
- Bucket* bucket = acquire_bucket(key_digest);
+ const bool success = with_bucket(key_digest, [&](Bucket* const bucket) {
+ memmove(&bucket->entries[1],
+ &bucket->entries[0],
+ sizeof(Entry) * (k_num_entries - 1));
+
+ bucket->entries[0].key_digest = key_digest;
+ bucket->entries[0].file_digest = file_digest;
+ bucket->entries[0].return_value = return_value;
+ });
- if (!bucket) {
+ if (!success) {
return false;
}
- memmove(&bucket->entries[1],
- &bucket->entries[0],
- sizeof(Entry) * (k_num_entries - 1));
-
- bucket->entries[0].key_digest = key_digest;
- bucket->entries[0].file_digest = file_digest;
- bucket->entries[0].return_value = return_value;
-
- release_bucket(bucket);
-
LOG("inode cache insert: {}", path);
return true;
#include "config.h"
+#include <functional>
#include <string>
class Config;
struct Entry;
struct Key;
struct SharedRegion;
+ using BucketHandler = std::function<void(Bucket* bucket)>;
bool mmap_file(const std::string& inode_cache_file);
static bool
hash_inode(const std::string& path, ContentType type, Digest& digest);
- Bucket* acquire_bucket(uint32_t index);
- Bucket* acquire_bucket(const Digest& key_digest);
- static void release_bucket(Bucket* bucket);
+ bool with_bucket(const Digest& key_digest,
+ const BucketHandler& bucket_handler);
static bool create_new_file(const std::string& filename);
bool initialize();
error);
if (error == ERROR_PATH_NOT_FOUND) {
// Directory doesn't exist?
- if (Util::create_dir(Util::dir_name(lockfile)) == 0) {
+ if (Util::create_dir(Util::dir_name(lockfile))) {
// OK. Retry.
continue;
}
// Copyright (C) 2002 Andrew Tridgell
-// Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2009-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
if (tm) {
strftime(timestamp, sizeof(timestamp), "%Y-%m-%dT%H:%M:%S", &*tm);
} else {
- snprintf(timestamp, sizeof(timestamp), "%lu", tv.tv_sec);
+ snprintf(timestamp,
+ sizeof(timestamp),
+ "%llu",
+ static_cast<long long unsigned int>(tv.tv_sec));
}
snprintf(prefix,
sizeof(prefix),
-// Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2009-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
#include "File.hpp"
#include "Hash.hpp"
#include "Logging.hpp"
+#include "Sloppiness.hpp"
#include "StdMakeUnique.hpp"
-#include "ccache.hpp"
#include "fmtmacros.hpp"
#include "hashutil.hpp"
} catch (const Error& e) {
LOG("Error: {}", e.what());
}
+ } else {
+ LOG_RAW("The entry already exists in the manifest, not adding");
}
return false;
}
class NonCopyable
{
-protected:
- NonCopyable() = default;
-
-private:
+public:
NonCopyable(const NonCopyable&) = delete;
NonCopyable& operator=(const NonCopyable&) = delete;
+
+protected:
+ NonCopyable() = default;
};
#include "File.hpp"
#include "Logging.hpp"
#include "Stat.hpp"
-#include "Statistics.hpp"
+#include "Statistic.hpp"
#include "Util.hpp"
#include "exceptions.hpp"
#include "fmtmacros.hpp"
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
uint64_t file_len,
nonstd::optional<std::string> raw_file)
{
- std::string dest_path;
+ LOG("Reading {} entry #{} {} ({} bytes)",
+ raw_file ? "raw" : "embedded",
+ entry_number,
+ Result::file_type_to_string(file_type),
+ file_len);
+ std::string dest_path;
m_dest_file_type = file_type;
switch (file_type) {
break;
case FileType::dependency:
+ // Dependency file: Open destination file but accumulate data in m_dest_data
+ // and write it in on_entry_end.
if (m_ctx.args_info.generating_dependencies) {
dest_path = m_ctx.args_info.output_dep;
m_dest_data.reserve(file_len);
break;
case FileType::stderr_output:
+ // Stderr data: Don't open a destination file. Instead accumulate it in
+ // m_dest_data and write it in on_entry_end.
m_dest_data.reserve(file_len);
- return;
+ break;
case FileType::coverage_unmangled:
if (m_ctx.args_info.generating_coverage) {
break;
}
- if (dest_path.empty()) {
- LOG_RAW("Not copying");
+ if (file_type == FileType::stderr_output) {
+ // Written in on_entry_end.
+ } else if (dest_path.empty()) {
+ LOG_RAW("Not writing");
} else if (dest_path == "/dev/null") {
- LOG_RAW("Not copying to /dev/null");
+ LOG_RAW("Not writing to /dev/null");
+ } else if (raw_file) {
+ Util::clone_hard_link_or_copy_file(m_ctx, *raw_file, dest_path, false);
+
+ // Update modification timestamp to save the file from LRU cleanup (and, if
+ // hard-linked, to make the object file newer than the source file).
+ Util::update_mtime(*raw_file);
} else {
- LOG("Retrieving {} file #{} {} ({} bytes)",
- raw_file ? "raw" : "embedded",
- entry_number,
- Result::file_type_to_string(file_type),
- file_len);
-
- if (raw_file) {
- Util::clone_hard_link_or_copy_file(m_ctx, *raw_file, dest_path, false);
-
- // Update modification timestamp to save the file from LRU cleanup (and,
- // if hard-linked, to make the object file newer than the source file).
- Util::update_mtime(*raw_file);
- } else {
- LOG("Copying to {}", dest_path);
- m_dest_fd = Fd(
- open(dest_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666));
- if (!m_dest_fd) {
- throw Error(
- "Failed to open {} for writing: {}", dest_path, strerror(errno));
- }
- m_dest_path = dest_path;
+ LOG("Writing to {}", dest_path);
+ m_dest_fd = Fd(
+ open(dest_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666));
+ if (!m_dest_fd) {
+ throw Error(
+ "Failed to open {} for writing: {}", dest_path, strerror(errno));
}
+ m_dest_path = dest_path;
}
}
void
ResultRetriever::on_entry_data(const uint8_t* data, size_t size)
{
- ASSERT((m_dest_file_type == FileType::stderr_output && !m_dest_fd)
- || (m_dest_file_type != FileType::stderr_output && m_dest_fd));
+ ASSERT(!(m_dest_file_type == FileType::stderr_output && m_dest_fd));
if (m_dest_file_type == FileType::stderr_output
|| (m_dest_file_type == FileType::dependency && !m_dest_path.empty())) {
m_dest_data.append(reinterpret_cast<const char*>(data), size);
- } else {
+ } else if (m_dest_fd) {
try {
Util::write_fd(*m_dest_fd, data, size);
} catch (Error& e) {
ResultRetriever::on_entry_end()
{
if (m_dest_file_type == FileType::stderr_output) {
+ LOG("Writing to file descriptor {}", STDERR_FILENO);
Util::send_to_stderr(m_ctx, m_dest_data);
} else if (m_dest_file_type == FileType::dependency && !m_dest_path.empty()) {
write_dependency_file();
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
#include "SignalHandler.hpp"
-#include "assertions.hpp"
-
#ifndef _WIN32
# include "Context.hpp"
+# include "assertions.hpp"
+
+# include <signal.h> // NOLINT: sigaddset et al are defined in signal.h
namespace {
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
#include "system.hpp"
-#include "signal.h"
-
class Context;
class SignalHandler
--- /dev/null
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+enum Sloppiness {
+ SLOPPY_INCLUDE_FILE_MTIME = 1 << 0,
+ SLOPPY_INCLUDE_FILE_CTIME = 1 << 1,
+ SLOPPY_TIME_MACROS = 1 << 2,
+ SLOPPY_PCH_DEFINES = 1 << 3,
+ // Allow us to match files based on their stats (size, mtime, ctime), without
+ // looking at their contents.
+ SLOPPY_FILE_STAT_MATCHES = 1 << 4,
+ // Allow us to not include any system headers in the manifest include files,
+ // similar to -MM versus -M for dependencies.
+ SLOPPY_SYSTEM_HEADERS = 1 << 5,
+ // Allow us to ignore ctimes when comparing file stats, so we can fake mtimes
+ // if we want to (it is much harder to fake ctimes, requires changing clock)
+ SLOPPY_FILE_STAT_MATCHES_CTIME = 1 << 6,
+ // Allow us to not include the -index-store-path option in the manifest hash.
+ SLOPPY_CLANG_INDEX_STORE = 1 << 7,
+ // Ignore locale settings.
+ SLOPPY_LOCALE = 1 << 8,
+ // Allow caching even if -fmodules is used.
+ SLOPPY_MODULES = 1 << 9,
+};
--- /dev/null
+// Copyright (C) 2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+// Statistics fields in storage order.
+enum class Statistic {
+ none = 0,
+ compiler_produced_stdout = 1,
+ compile_failed = 2,
+ internal_error = 3,
+ cache_miss = 4,
+ preprocessor_error = 5,
+ could_not_find_compiler = 6,
+ missing_cache_file = 7,
+ preprocessed_cache_hit = 8,
+ bad_compiler_arguments = 9,
+ called_for_link = 10,
+ files_in_cache = 11,
+ cache_size_kibibyte = 12,
+ obsolete_max_files = 13,
+ obsolete_max_size = 14,
+ unsupported_source_language = 15,
+ bad_output_file = 16,
+ no_input_file = 17,
+ multiple_source_files = 18,
+ autoconf_test = 19,
+ unsupported_compiler_option = 20,
+ output_to_stdout = 21,
+ direct_cache_hit = 22,
+ compiler_produced_no_output = 23,
+ compiler_produced_empty_output = 24,
+ error_hashing_extra_file = 25,
+ compiler_check_failed = 26,
+ could_not_use_precompiled_header = 27,
+ called_for_preprocessing = 28,
+ cleanups_performed = 29,
+ unsupported_code_directive = 30,
+ stats_zeroed_timestamp = 31,
+ could_not_use_modules = 32,
+
+ END
+};
#include "system.hpp"
#include "Counters.hpp"
+#include "Statistic.hpp" // Any reasonable use of Statistics requires the Statistic enum.
#include "third_party/nonstd/optional.hpp"
class Config;
-// Statistics fields in storage order.
-enum class Statistic {
- none = 0,
- compiler_produced_stdout = 1,
- compile_failed = 2,
- internal_error = 3,
- cache_miss = 4,
- preprocessor_error = 5,
- could_not_find_compiler = 6,
- missing_cache_file = 7,
- preprocessed_cache_hit = 8,
- bad_compiler_arguments = 9,
- called_for_link = 10,
- files_in_cache = 11,
- cache_size_kibibyte = 12,
- obsolete_max_files = 13,
- obsolete_max_size = 14,
- unsupported_source_language = 15,
- bad_output_file = 16,
- no_input_file = 17,
- multiple_source_files = 18,
- autoconf_test = 19,
- unsupported_compiler_option = 20,
- output_to_stdout = 21,
- direct_cache_hit = 22,
- compiler_produced_no_output = 23,
- compiler_produced_empty_output = 24,
- error_hashing_extra_file = 25,
- compiler_check_failed = 26,
- could_not_use_precompiled_header = 27,
- called_for_preprocessing = 28,
- cleanups_performed = 29,
- unsupported_code_directive = 30,
- stats_zeroed_timestamp = 31,
- could_not_use_modules = 32,
-
- END
-};
-
namespace Statistics {
// Read counters from `path`. No lock is acquired.
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
#include "Util.hpp"
+#ifdef _WIN32
+# include "third_party/win32/mktemp.h"
+#endif
+
using nonstd::string_view;
namespace {
}
#endif
-#ifndef HAVE_MKSTEMP
-// Cheap and nasty mkstemp replacement.
-int
-mkstemp(char* name_template)
-{
-# ifdef __GNUC__
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-# endif
- mktemp(name_template);
-# ifdef __GNUC__
-# pragma GCC diagnostic pop
-# endif
- return open(name_template, O_RDWR | O_CREAT | O_EXCL | O_BINARY, 0600);
-}
-#endif
-
} // namespace
TemporaryFile::TemporaryFile(string_view path_prefix)
: path(std::string(path_prefix) + ".XXXXXX")
{
Util::ensure_dir_exists(Util::dir_name(path));
+#ifdef _WIN32
+ // MSVC lacks mkstemp() and Mingw-w64's implementation[1] is problematic, as
+ // it can reuse the names of recently-deleted files unless the caller
+ // remembers to call srand().
+
+ // [1]: <https://github.com/Alexpux/mingw-w64/blob/
+ // d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-crt/misc/mkstemp.c>
+ fd = Fd(bsd_mkstemp(&path[0]));
+#else
fd = Fd(mkstemp(&path[0]));
+#endif
if (!fd) {
throw Fatal(
"Failed to create temporary file for {}: {}", path, strerror(errno));
-// Copyright (C) 2019-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2019-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
#endif
}
if (ctx.config.hard_link()) {
- unlink(dest.c_str());
LOG("Hard linking {} to {}", source, dest);
- int ret = link(source.c_str(), dest.c_str());
- if (ret == 0) {
+ try {
+ Util::hard_link(source, dest);
if (chmod(dest.c_str(), 0444) != 0) {
LOG("Failed to chmod: {}", strerror(errno));
}
return;
+ } catch (const Error& e) {
+ LOG_RAW(e.what());
+ // Fall back to copying.
}
- LOG("Failed to hard link: {}", strerror(errno));
}
LOG("Copying {} to {}", source, dest);
#endif
size_t n = path.find_last_of(delim);
if (n == std::string::npos) {
+ // "foo" -> "."
return ".";
+ } else if (n == 0) {
+ // "/" -> "/" (Windows: or "\\" -> "\\")
+ return path.substr(0, 1);
+#ifdef _WIN32
+ } else if (n == 2 && path[1] == ':') {
+ // Windows: "C:\\foo" -> "C:\\" or "C:/foo" -> "C:/"
+ return path.substr(0, 3);
+#endif
} else {
- return n == 0 ? "/" : path.substr(0, n);
+ // "/dir/foo" -> "/dir" (Windows: or "C:\\dir\\foo" -> "C:\\dir")
+ return path.substr(0, n);
}
}
}
}
-void
+std::vector<CacheFile>
get_level_1_files(const std::string& dir,
- const ProgressReceiver& progress_receiver,
- std::vector<std::shared_ptr<CacheFile>>& files)
+ const ProgressReceiver& progress_receiver)
{
+ std::vector<CacheFile> files;
+
if (!Stat::stat(dir)) {
- return;
+ return files;
}
size_t level_2_directories = 0;
}
if (!is_dir) {
- files.push_back(std::make_shared<CacheFile>(path));
+ files.emplace_back(path);
} else if (path != dir
&& path.find('/', dir.size() + 1) == std::string::npos) {
++level_2_directories;
});
progress_receiver(1.0);
+ return files;
}
std::string
return path;
}
+void
+hard_link(const std::string& oldpath, const std::string& newpath)
+{
+ // Assumption: newpath may already exist as a left-over file from a previous
+ // run, but it's only we who can create the file entry now so we don't try to
+ // handle a race between unlink() and link() below.
+ unlink(newpath.c_str());
+
+#ifndef _WIN32
+ if (link(oldpath.c_str(), newpath.c_str()) != 0) {
+ throw Error(
+ "failed to link {} to {}: {}", oldpath, newpath, strerror(errno));
+ }
+#else
+ if (!CreateHardLink(newpath.c_str(), oldpath.c_str(), nullptr)) {
+ DWORD error = GetLastError();
+ throw Error("failed to link {} to {}: {}",
+ oldpath,
+ newpath,
+ Win32Util::error_message(error));
+ }
+#endif
+}
+
bool
is_absolute_path(string_view path)
{
}
std::string
-make_relative_path(const Context& ctx, string_view path)
+make_relative_path(const std::string& base_dir,
+ const std::string& actual_cwd,
+ const std::string& apparent_cwd,
+ nonstd::string_view path)
{
- if (ctx.config.base_dir().empty()
- || !Util::starts_with(path, ctx.config.base_dir())) {
+ if (base_dir.empty() || !Util::starts_with(path, base_dir)) {
return std::string(path);
}
// The algorithm for computing relative paths below only works for existing
// paths. If the path doesn't exist, find the first ancestor directory that
// does exist and assemble the path again afterwards.
- string_view original_path = path;
- std::string path_suffix;
+
+ std::vector<std::string> relpath_candidates;
+ const auto original_path = path;
Stat path_stat;
while (!(path_stat = Stat::stat(std::string(path)))) {
path = Util::dir_name(path);
}
- path_suffix = std::string(original_path.substr(path.length()));
+ const auto path_suffix = std::string(original_path.substr(path.length()));
+ const auto real_path = Util::real_path(std::string(path));
- std::string path_str(path);
- std::string normalized_path = Util::normalize_absolute_path(path_str);
- std::vector<std::string> relpath_candidates = {
- Util::get_relative_path(ctx.actual_cwd, normalized_path),
- };
- if (ctx.apparent_cwd != ctx.actual_cwd) {
- relpath_candidates.emplace_back(
- Util::get_relative_path(ctx.apparent_cwd, normalized_path));
- // Move best (= shortest) match first:
- if (relpath_candidates[0].length() > relpath_candidates[1].length()) {
- std::swap(relpath_candidates[0], relpath_candidates[1]);
+ const auto add_relpath_candidates = [&](nonstd::string_view path) {
+ const std::string normalized_path = Util::normalize_absolute_path(path);
+ relpath_candidates.push_back(
+ Util::get_relative_path(actual_cwd, normalized_path));
+ if (apparent_cwd != actual_cwd) {
+ relpath_candidates.emplace_back(
+ Util::get_relative_path(apparent_cwd, normalized_path));
}
+ };
+ add_relpath_candidates(path);
+ if (real_path != path) {
+ add_relpath_candidates(real_path);
}
+ // Find best (i.e. shortest existing) match:
+ std::sort(relpath_candidates.begin(),
+ relpath_candidates.end(),
+ [](const std::string& path1, const std::string& path2) {
+ return path1.length() < path2.length();
+ });
for (const auto& relpath : relpath_candidates) {
if (Stat::stat(relpath).same_inode_as(path_stat)) {
return relpath + path_suffix;
return std::string(original_path);
}
+std::string
+make_relative_path(const Context& ctx, string_view path)
+{
+ return make_relative_path(
+ ctx.config.base_dir(), ctx.actual_cwd, ctx.apparent_cwd, path);
+}
+
bool
matches_dir_prefix_or_file(string_view dir_prefix_or_file, string_view path)
{
resolved = buffer;
}
#else
- // Yes, there are such systems. This replacement relies on the fact that when
- // we call x_realpath we only care about symlinks.
- {
- ssize_t len = readlink(path.c_str(), buffer, buffer_size - 1);
- if (len != -1) {
- buffer[len] = 0;
- resolved = buffer;
- }
- }
+# error No realpath function available
#endif
return resolved ? resolved : (return_empty_on_error ? "" : path);
// Parameters:
// - dir: The directory to traverse recursively.
// - progress_receiver: Function that will be called for progress updates.
-// - files: Found files.
-void get_level_1_files(const std::string& dir,
- const ProgressReceiver& progress_receiver,
- std::vector<std::shared_ptr<CacheFile>>& files);
+std::vector<CacheFile>
+get_level_1_files(const std::string& dir,
+ const ProgressReceiver& progress_receiver);
// Return the current user's home directory, or throw `Fatal` if it can't
// be determined.
uint8_t level,
nonstd::string_view name);
+// Hard-link `oldpath` to `newpath`. Throws `Error` on error.
+void hard_link(const std::string& oldpath, const std::string& newpath);
+
// Write bytes in big endian order from an integer value.
//
// Parameters:
// time of day is used.
nonstd::optional<tm> localtime(nonstd::optional<time_t> time = {});
-// Make a relative path from current working directory to `path` if `path` is
-// under the base directory.
+// Make a relative path from current working directory (either `actual_cwd` or
+// `apparent_cwd`) to `path` if `path` is under `base_dir`.
+std::string make_relative_path(const std::string& base_dir,
+ const std::string& actual_cwd,
+ const std::string& apparent_cwd,
+ nonstd::string_view path);
+
+// Like above but with base directory and apparent/actual CWD taken from `ctx`.
std::string make_relative_path(const Context& ctx, nonstd::string_view path);
// Return whether `path` is equal to `dir_prefix_or_file` or if
-// Copyright (C) 2020 Joel Rosdahl and other contributors
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
bool found_rewrite_includes = false;
std::string explicit_language; // As specified with -x.
- std::string file_language; // As deduced from file extension.
std::string input_charset_option; // -finput-charset=...
// Is the dependency makefile name overridden with -MF?
return nullopt;
}
+ if (config.compiler_type() != CompilerType::clang
+ && (args[i] == "-fcolor-diagnostics"
+ || args[i] == "-fno-color-diagnostics")) {
+ // Special case: If a non-Clang compiler gets -f(no-)color-diagnostics we'll
+ // bail out and just execute the compiler. The reason is that we don't
+ // include -f(no-)color-diagnostics in the hash so there can be a false
+ // cache hit in the following scenario:
+ //
+ // 1. ccache gcc -c example.c # adds a cache entry
+ // 2. ccache gcc -c example.c -fcolor-diagnostics # unexpectedly succeeds
+ return Statistic::unsupported_compiler_option;
+ }
+
if (args[i] == "-fcolor-diagnostics" || args[i] == "-fdiagnostics-color"
|| args[i] == "-fdiagnostics-color=always") {
state.color_diagnostics = ColorDiagnostics::always;
}
if (!args_info.input_file.empty()) {
- if (!language_for_file(args[i]).empty()) {
+ if (supported_source_extension(args[i])) {
LOG("Multiple input files: {} and {}", args_info.input_file, args[i]);
return Statistic::multiple_source_files;
} else if (!state.found_c_opt && !state.found_dc_opt) {
if (!state.explicit_language.empty() && state.explicit_language == "none") {
state.explicit_language.clear();
}
- state.file_language = language_for_file(args_info.input_file);
if (!state.explicit_language.empty()) {
if (!language_is_supported(state.explicit_language)) {
LOG("Unsupported language: {}", state.explicit_language);
}
args_info.actual_language = state.explicit_language;
} else {
- args_info.actual_language = state.file_language;
+ args_info.actual_language =
+ language_for_file(args_info.input_file, config.compiler_type());
}
args_info.output_is_precompiled_header =
return Statistic::unsupported_source_language;
}
- if (!config.run_second_cpp() && args_info.actual_language == "cu") {
- LOG_RAW("Using CUDA compiler; not compiling preprocessed code");
+ if (!config.run_second_cpp()
+ && (args_info.actual_language == "cu"
+ || args_info.actual_language == "cuda")) {
+ LOG("Source language is \"{}\"; not compiling preprocessed code",
+ args_info.actual_language);
config.set_run_second_cpp(true);
}
}
if (args_info.seen_split_dwarf) {
- size_t pos = args_info.output_obj.rfind('.');
- if (pos == std::string::npos || pos == args_info.output_obj.size() - 1) {
- LOG_RAW("Badly formed object filename");
- return Statistic::bad_compiler_arguments;
- }
-
args_info.output_dwo = Util::change_extension(args_info.output_obj, ".dwo");
}
#pragma once
#include "Args.hpp"
-#include "Statistics.hpp"
+#include "Statistic.hpp"
#include "third_party/nonstd/optional.hpp"
struct ProcessArgsResult
{
- ProcessArgsResult(Statistic error);
- ProcessArgsResult(const Args& preprocessor_args,
- const Args& extra_args_to_hash,
- const Args& compiler_args);
+ ProcessArgsResult(Statistic error_);
+ ProcessArgsResult(const Args& preprocessor_args_,
+ const Args& extra_args_to_hash_,
+ const Args& compiler_args_);
// nullopt on success, otherwise the statistics counter that should be
// incremented.
// Copyright (C) 2002-2007 Andrew Tridgell
-// Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2009-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
#include "ResultExtractor.hpp"
#include "ResultRetriever.hpp"
#include "SignalHandler.hpp"
+#include "Statistics.hpp"
#include "StdMakeUnique.hpp"
#include "TemporaryFile.hpp"
#include "UmaskScope.hpp"
R"({} version {}
Copyright (C) 2002-2007 Andrew Tridgell
-Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+Copyright (C) 2009-2021 Joel Rosdahl and other contributors
See <https://ccache.dev/credits.html> for a complete list of contributors.
-M, --max-size SIZE set maximum size of cache to SIZE (use 0 for no
limit); available suffixes: k, M, G, T (decimal)
and Ki, Mi, Gi, Ti (binary); default suffix: G
- -X, --recompress LEVEL recompress the cache to LEVEL (integer level or
- "uncompressed")
+ -X, --recompress LEVEL recompress the cache to level LEVEL (integer or
+ "uncompressed") using the Zstandard algorithm;
+ see "Cache compression" in the manual for details
-o, --set-config KEY=VAL set configuration item KEY to value VAL
-x, --show-compression show compression statistics
-p, --show-config show current configuration options in
--print-stats print statistics counter IDs and corresponding
values in machine-parsable format
-See also <https://ccache.dev>.
+See also the manual on <https://ccache.dev/documentation.html>.
)";
// How often (in seconds) to scan $CCACHE_DIR/tmp for left-over temporary
// stored in the cache changes in a backwards-incompatible way.
const char HASH_PREFIX[] = "3";
+namespace {
+
+// Throw a Failure if ccache did not succeed in getting or putting a result in
+// the cache. If `exit_code` is set, just exit with that code directly,
+// otherwise execute the real compiler and exit with its exit code. Also updates
+// statistics counter `statistic` if it's not `Statistic::none`.
+class Failure : public std::exception
+{
+public:
+ Failure(Statistic statistic,
+ nonstd::optional<int> exit_code = nonstd::nullopt);
+
+ nonstd::optional<int> exit_code() const;
+ Statistic statistic() const;
+
+private:
+ Statistic m_statistic;
+ nonstd::optional<int> m_exit_code;
+};
+
+inline Failure::Failure(Statistic statistic, nonstd::optional<int> exit_code)
+ : m_statistic(statistic), m_exit_code(exit_code)
+{
+}
+
+inline nonstd::optional<int>
+Failure::exit_code() const
+{
+ return m_exit_code;
+}
+
+inline Statistic
+Failure::statistic() const
+{
+ return m_statistic;
+}
+
+} // namespace
+
static void
add_prefix(const Context& ctx, Args& args, const std::string& prefix_command)
{
});
}
+static std::string
+prepare_debug_path(const std::string& debug_dir,
+ const std::string& output_obj,
+ string_view suffix)
+{
+ const std::string prefix =
+ debug_dir.empty() ? output_obj : debug_dir + Util::real_path(output_obj);
+ try {
+ Util::ensure_dir_exists(Util::dir_name(prefix));
+ } catch (Error&) {
+ // Ignore since we can't handle an error in another way in this context. The
+ // caller takes care of logging when trying to open the path for writing.
+ }
+ return FMT("{}.ccache-{}", prefix, suffix);
+}
+
static void
init_hash_debug(Context& ctx,
Hash& hash,
- string_view obj_path,
char type,
string_view section_name,
FILE* debug_text_file)
return;
}
- std::string path = FMT("{}.ccache-input-{}", obj_path, type);
+ const auto path = prepare_debug_path(
+ ctx.config.debug_dir(), ctx.args_info.output_obj, FMT("input-{}", type));
File debug_binary_file(path, "wb");
if (debug_binary_file) {
hash.enable_debug(section_name, debug_binary_file.get(), debug_text_file);
}
}
+static bool
+include_file_too_new(const Context& ctx,
+ const std::string& path,
+ const Stat& path_stat)
+{
+ // The comparison using >= is intentional, due to a possible race between
+ // starting compilation and writing the include file. See also the notes under
+ // "Performance" in doc/MANUAL.adoc.
+ if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_MTIME)
+ && path_stat.mtime() >= ctx.time_of_compilation) {
+ LOG("Include file {} too new", path);
+ return true;
+ }
+
+ // The same >= logic as above applies to the change time of the file.
+ if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_CTIME)
+ && path_stat.ctime() >= ctx.time_of_compilation) {
+ LOG("Include file {} ctime too new", path);
+ return true;
+ }
+
+ return false;
+}
+
+// Returns false if the include file was "too new" and therefore should disable
+// the direct mode (or, in the case of a preprocessed header, fall back to just
+// running the real compiler), otherwise true.
static bool
do_remember_include_file(Context& ctx,
std::string path,
bool system,
Hash* depend_mode_hash)
{
- bool is_pch = false;
-
if (path.length() >= 2 && path[0] == '<' && path[path.length() - 1] == '>') {
// Typically <built-in> or <command-line>.
return true;
}
}
- // The comparison using >= is intentional, due to a possible race between
- // starting compilation and writing the include file. See also the notes
- // under "Performance" in doc/MANUAL.adoc.
- if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_MTIME)
- && st.mtime() >= ctx.time_of_compilation) {
- LOG("Include file {} too new", path);
- return false;
- }
+ const bool is_pch = Util::is_precompiled_header(path);
+ const bool too_new = include_file_too_new(ctx, path, st);
+
+ if (too_new) {
+ // Opt out of direct mode because of a race condition.
+ //
+ // The race condition consists of these events:
+ //
+ // - the preprocessor is run
+ // - an include file is modified by someone
+ // - the new include file is hashed by ccache
+ // - the real compiler is run on the preprocessor's output, which contains
+ // data from the old header file
+ // - the wrong object file is stored in the cache.
- // The same >= logic as above applies to the change time of the file.
- if (!(ctx.config.sloppiness() & SLOPPY_INCLUDE_FILE_CTIME)
- && st.ctime() >= ctx.time_of_compilation) {
- LOG("Include file {} ctime too new", path);
return false;
}
// Let's hash the include file content.
Hash fhash;
- is_pch = Util::is_precompiled_header(path);
if (is_pch) {
if (ctx.included_pch_file.empty()) {
LOG("Detected use of precompiled header: {}", path);
return true;
}
+enum class RememberIncludeFileResult { ok, cannot_use_pch };
+
// This function hashes an include file and stores the path and hash in
// ctx.included_files. If the include file is a PCH, cpp_hash is also updated.
-static void
+static RememberIncludeFileResult
remember_include_file(Context& ctx,
const std::string& path,
Hash& cpp_hash,
bool system,
Hash* depend_mode_hash)
{
- if (!do_remember_include_file(ctx, path, cpp_hash, system, depend_mode_hash)
- && ctx.config.direct_mode()) {
- LOG_RAW("Disabling direct mode");
- ctx.config.set_direct_mode(false);
+ if (!do_remember_include_file(
+ ctx, path, cpp_hash, system, depend_mode_hash)) {
+ if (Util::is_precompiled_header(path)) {
+ return RememberIncludeFileResult::cannot_use_pch;
+ } else if (ctx.config.direct_mode()) {
+ LOG_RAW("Disabling direct mode");
+ ctx.config.set_direct_mode(false);
+ }
}
+
+ return RememberIncludeFileResult::ok;
}
static void
// - Makes include file paths for which the base directory is a prefix relative
// when computing the hash sum.
// - Stores the paths and hashes of included files in ctx.included_files.
-static bool
+//
+// Returns Statistic::none on success, otherwise a statistics counter to be
+// incremented.
+static Statistic
process_preprocessed_file(Context& ctx,
Hash& hash,
const std::string& path,
try {
data = Util::read_file(path);
} catch (Error&) {
- return false;
+ return Statistic::internal_error;
}
// Bytes between p and q are pending to be hashed.
q++;
if (q >= end) {
LOG_RAW("Failed to parse included file path");
- return false;
+ return Statistic::internal_error;
}
// q points to the beginning of an include file path
hash.hash(p, q - p);
hash.hash(inc_path);
}
- remember_include_file(ctx, inc_path, hash, system, nullptr);
+ if (remember_include_file(ctx, inc_path, hash, system, nullptr)
+ == RememberIncludeFileResult::cannot_use_pch) {
+ return Statistic::could_not_use_precompiled_header;
+ }
p = q; // Everything of interest between p and q has been hashed now.
} else if (q[0] == '.' && q[1] == 'i' && q[2] == 'n' && q[3] == 'c'
&& q[4] == 'b' && q[5] == 'i' && q[6] == 'n') {
print_included_files(ctx, stdout);
}
- return true;
+ return Statistic::none;
}
// Extract the used includes from the dependency file. Note that we cannot
TemporaryFile tmp_stdout(
FMT("{}/tmp.cpp_stdout", ctx.config.temporary_dir()));
- stdout_path = tmp_stdout.path;
+ ctx.register_pending_tmp_file(tmp_stdout.path);
+
+ // stdout_path needs the proper cpp_extension for the compiler to do its
+ // thing correctly.
+ stdout_path = FMT("{}.{}", tmp_stdout.path, ctx.config.cpp_extension());
+ Util::hard_link(tmp_stdout.path, stdout_path);
ctx.register_pending_tmp_file(stdout_path);
TemporaryFile tmp_stderr(
}
hash.hash_delimiter("cpp");
- bool is_pump = ctx.config.compiler_type() == CompilerType::pump;
- if (!process_preprocessed_file(ctx, hash, stdout_path, is_pump)) {
- throw Failure(Statistic::internal_error);
+ const bool is_pump = ctx.config.compiler_type() == CompilerType::pump;
+ const Statistic error =
+ process_preprocessed_file(ctx, hash, stdout_path, is_pump);
+ if (error != Statistic::none) {
+ throw Failure(error);
}
hash.hash_delimiter("cppstderr");
if (ctx.args_info.direct_i_file) {
ctx.i_tmpfile = ctx.args_info.input_file;
} else {
- // i_tmpfile needs the proper cpp_extension for the compiler to do its
- // thing correctly
- ctx.i_tmpfile = FMT("{}.{}", stdout_path, ctx.config.cpp_extension());
- Util::rename(stdout_path, ctx.i_tmpfile);
- ctx.register_pending_tmp_file(ctx.i_tmpfile);
+ ctx.i_tmpfile = stdout_path;
}
if (!ctx.config.run_second_cpp()) {
"COMPILER_PATH",
"GCC_COMPARE_DEBUG",
"GCC_EXEC_PREFIX",
- "SOURCE_DATE_EPOCH",
+ // Note: SOURCE_DATE_EPOCH is handled in hash_source_code_string().
};
for (const char* name : always_hash_env_vars) {
const char* value = getenv(name);
// Dump log buffer last to not lose any logs.
if (ctx.config.debug() && !ctx.args_info.output_obj.empty()) {
- const auto path = FMT("{}.ccache-log", ctx.args_info.output_obj);
- Logging::dump_log(path);
+ Logging::dump_log(prepare_debug_path(
+ ctx.config.debug_dir(), ctx.args_info.output_obj, "log"));
}
}
MTR_META_THREAD_NAME(ctx.args_info.output_obj.c_str());
if (ctx.config.debug()) {
- std::string path = FMT("{}.ccache-input-text", ctx.args_info.output_obj);
+ const auto path = prepare_debug_path(
+ ctx.config.debug_dir(), ctx.args_info.output_obj, "input-text");
File debug_text_file(path, "w");
if (debug_text_file) {
ctx.hash_debug_files.push_back(std::move(debug_text_file));
: nullptr;
Hash common_hash;
- init_hash_debug(
- ctx, common_hash, ctx.args_info.output_obj, 'c', "COMMON", debug_text_file);
+ init_hash_debug(ctx, common_hash, 'c', "COMMON", debug_text_file);
MTR_BEGIN("hash", "common_hash");
hash_common_info(
// Try to find the hash using the manifest.
Hash direct_hash = common_hash;
- init_hash_debug(ctx,
- direct_hash,
- ctx.args_info.output_obj,
- 'd',
- "DIRECT MODE",
- debug_text_file);
+ init_hash_debug(ctx, direct_hash, 'd', "DIRECT MODE", debug_text_file);
Args args_to_hash = processed.preprocessor_args;
args_to_hash.push_back(processed.extra_args_to_hash);
// Find the hash using the preprocessed output. Also updates
// ctx.included_files.
Hash cpp_hash = common_hash;
- init_hash_debug(ctx,
- cpp_hash,
- ctx.args_info.output_obj,
- 'p',
- "PREPROCESSOR MODE",
- debug_text_file);
+ init_hash_debug(ctx, cpp_hash, 'p', "PREPROCESSOR MODE", debug_text_file);
MTR_BEGIN("hash", "cpp_hash");
result_name = calculate_result_name(
extern const char CCACHE_VERSION[];
-const uint32_t SLOPPY_INCLUDE_FILE_MTIME = 1 << 0;
-const uint32_t SLOPPY_INCLUDE_FILE_CTIME = 1 << 1;
-const uint32_t SLOPPY_TIME_MACROS = 1 << 2;
-const uint32_t SLOPPY_PCH_DEFINES = 1 << 3;
-// Allow us to match files based on their stats (size, mtime, ctime), without
-// looking at their contents.
-const uint32_t SLOPPY_FILE_STAT_MATCHES = 1 << 4;
-// Allow us to not include any system headers in the manifest include files,
-// similar to -MM versus -M for dependencies.
-const uint32_t SLOPPY_SYSTEM_HEADERS = 1 << 5;
-// Allow us to ignore ctimes when comparing file stats, so we can fake mtimes
-// if we want to (it is much harder to fake ctimes, requires changing clock)
-const uint32_t SLOPPY_FILE_STAT_MATCHES_CTIME = 1 << 6;
-// Allow us to not include the -index-store-path option in the manifest hash.
-const uint32_t SLOPPY_CLANG_INDEX_STORE = 1 << 7;
-// Ignore locale settings.
-const uint32_t SLOPPY_LOCALE = 1 << 8;
-// Allow caching even if -fmodules is used.
-const uint32_t SLOPPY_MODULES = 1 << 9;
-
using FindExecutableFunction =
std::function<std::string(const Context& ctx,
const std::string& name,
#include "Config.hpp"
#include "Context.hpp"
#include "Logging.hpp"
+#include "Statistics.hpp"
#include "Util.hpp"
#ifdef INODE_CACHE_SUPPORTED
{
LOG("Cleaning up cache directory {}", subdir);
- std::vector<std::shared_ptr<CacheFile>> files;
- Util::get_level_1_files(
- subdir, [&](double progress) { progress_receiver(progress / 3); }, files);
+ std::vector<CacheFile> files = Util::get_level_1_files(
+ subdir, [&](double progress) { progress_receiver(progress / 3); });
uint64_t cache_size = 0;
uint64_t files_in_cache = 0;
++i, progress_receiver(1.0 / 3 + 1.0 * i / files.size() / 3)) {
const auto& file = files[i];
- if (!file->lstat().is_regular()) {
+ if (!file.lstat().is_regular()) {
// Not a file or missing file.
continue;
}
// Delete any tmp files older than 1 hour right away.
- if (file->lstat().mtime() + 3600 < current_time
- && Util::base_name(file->path()).find(".tmp.") != std::string::npos) {
- Util::unlink_tmp(file->path());
+ if (file.lstat().mtime() + 3600 < current_time
+ && Util::base_name(file.path()).find(".tmp.") != std::string::npos) {
+ Util::unlink_tmp(file.path());
continue;
}
- cache_size += file->lstat().size_on_disk();
+ cache_size += file.lstat().size_on_disk();
files_in_cache += 1;
}
// Sort according to modification time, oldest first.
- std::sort(files.begin(),
- files.end(),
- [](const std::shared_ptr<CacheFile>& f1,
- const std::shared_ptr<CacheFile>& f2) {
- return f1->lstat().mtime() < f2->lstat().mtime();
- });
+ std::sort(
+ files.begin(), files.end(), [](const CacheFile& f1, const CacheFile& f2) {
+ return f1.lstat().mtime() < f2.lstat().mtime();
+ });
LOG("Before cleanup: {:.0f} KiB, {:.0f} files",
static_cast<double>(cache_size) / 1024,
++i, progress_receiver(2.0 / 3 + 1.0 * i / files.size() / 3)) {
const auto& file = files[i];
- if (!file->lstat() || file->lstat().is_directory()) {
+ if (!file.lstat() || file.lstat().is_directory()) {
continue;
}
if ((max_size == 0 || cache_size <= max_size)
&& (max_files == 0 || files_in_cache <= max_files)
&& (max_age == 0
- || file->lstat().mtime()
+ || file.lstat().mtime()
> (current_time - static_cast<int64_t>(max_age)))) {
break;
}
- if (Util::ends_with(file->path(), ".stderr")) {
+ if (Util::ends_with(file.path(), ".stderr")) {
// In order to be nice to legacy ccache versions, make sure that the .o
// file is deleted before .stderr, because if the ccache process gets
// killed after deleting the .stderr but before deleting the .o, the
// cached result will be inconsistent. (.stderr is the only file that is
// optional for legacy ccache versions; any other file missing from the
// cache will be detected.)
- std::string o_file =
- file->path().substr(0, file->path().size() - 6) + "o";
+ std::string o_file = file.path().substr(0, file.path().size() - 6) + "o";
// Don't subtract this extra deletion from the cache size; that
// bookkeeping will be done when the loop reaches the .o file. If the
}
delete_file(
- file->path(), file->lstat().size_on_disk(), &cache_size, &files_in_cache);
+ file.path(), file.lstat().size_on_disk(), &cache_size, &files_in_cache);
cleaned = true;
}
{
LOG("Clearing out cache directory {}", subdir);
- std::vector<std::shared_ptr<CacheFile>> files;
- Util::get_level_1_files(
- subdir, [&](double progress) { progress_receiver(progress / 2); }, files);
+ const std::vector<CacheFile> files = Util::get_level_1_files(
+ subdir, [&](double progress) { progress_receiver(progress / 2); });
for (size_t i = 0; i < files.size(); ++i) {
- Util::unlink_safe(files[i]->path());
+ Util::unlink_safe(files[i].path());
progress_receiver(0.5 + 0.5 * i / files.size());
}
config.cache_dir(),
[&](const std::string& subdir,
const Util::ProgressReceiver& sub_progress_receiver) {
- std::vector<std::shared_ptr<CacheFile>> files;
- Util::get_level_1_files(
- subdir,
- [&](double progress) { sub_progress_receiver(progress / 2); },
- files);
+ const std::vector<CacheFile> files = Util::get_level_1_files(
+ subdir, [&](double progress) { sub_progress_receiver(progress / 2); });
for (size_t i = 0; i < files.size(); ++i) {
const auto& cache_file = files[i];
- on_disk_size += cache_file->lstat().size_on_disk();
+ on_disk_size += cache_file.lstat().size_on_disk();
try {
- auto file = open_file(cache_file->path(), "rb");
- auto reader = create_reader(*cache_file, file.get());
- compr_size += cache_file->lstat().size();
+ auto file = open_file(cache_file.path(), "rb");
+ auto reader = create_reader(cache_file, file.get());
+ compr_size += cache_file.lstat().size();
content_size += reader->content_size();
} catch (Error&) {
- incompr_size += cache_file->lstat().size();
+ incompr_size += cache_file.lstat().size();
}
sub_progress_receiver(1.0 / 2 + 1.0 * i / files.size() / 2);
ctx.config.cache_dir(),
[&](const std::string& subdir,
const Util::ProgressReceiver& sub_progress_receiver) {
- std::vector<std::shared_ptr<CacheFile>> files;
- Util::get_level_1_files(
- subdir,
- [&](double progress) { sub_progress_receiver(0.1 * progress); },
- files);
+ std::vector<CacheFile> files =
+ Util::get_level_1_files(subdir, [&](double progress) {
+ sub_progress_receiver(0.1 * progress);
+ });
auto stats_file = subdir + "/stats";
for (size_t i = 0; i < files.size(); ++i) {
const auto& file = files[i];
- if (file->type() != CacheFile::Type::unknown) {
+ if (file.type() != CacheFile::Type::unknown) {
thread_pool.enqueue([&statistics, stats_file, file, level] {
try {
- recompress_file(statistics, stats_file, *file, level);
+ recompress_file(statistics, stats_file, file, level);
} catch (Error&) {
// Ignore for now.
}
});
} else {
- statistics.update(0, 0, 0, file->lstat().size());
+ statistics.update(0, 0, 0, file.lstat().size());
}
sub_progress_receiver(0.1 + 0.9 * i / files.size());
#include "system.hpp"
#include "FormatNonstdStringView.hpp"
-#include "Statistics.hpp"
#include "third_party/fmt/core.h"
#include "third_party/nonstd/optional.hpp"
: ErrorBase(fmt::format(std::forward<T>(args)...))
{
}
-
-// Throw a Failure if ccache did not succeed in getting or putting a result in
-// the cache. If `exit_code` is set, just exit with that code directly,
-// otherwise execute the real compiler and exit with its exit code. Also updates
-// statistics counter `statistic` if it's not `Statistic::none`.
-class Failure : public std::exception
-{
-public:
- Failure(Statistic statistic,
- nonstd::optional<int> exit_code = nonstd::nullopt);
-
- nonstd::optional<int> exit_code() const;
- Statistic statistic() const;
-
-private:
- Statistic m_statistic;
- nonstd::optional<int> m_exit_code;
-};
-
-inline Failure::Failure(Statistic statistic, nonstd::optional<int> exit_code)
- : m_statistic(statistic), m_exit_code(exit_code)
-{
-}
-
-inline nonstd::optional<int>
-Failure::exit_code() const
-{
- return m_exit_code;
-}
-
-inline Statistic
-Failure::statistic() const
-{
- return m_statistic;
-}
-// Copyright (C) 2009-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2009-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
#include "Context.hpp"
#include "Hash.hpp"
#include "Logging.hpp"
+#include "Sloppiness.hpp"
#include "Stat.hpp"
-#include "ccache.hpp"
#include "execute.hpp"
#include "fmtmacros.hpp"
#include "macroskip.hpp"
hash.hash(now->tm_year);
hash.hash(now->tm_mon);
hash.hash(now->tm_mday);
+
+ // If the compiler has support for it, the expansion of __DATE__ will change
+ // according to the value of SOURCE_DATE_EPOCH. Note: We have to hash both
+ // SOURCE_DATE_EPOCH and the current date since we can't be sure that the
+ // compiler honors SOURCE_DATE_EPOCH.
+ const auto source_date_epoch = getenv("SOURCE_DATE_EPOCH");
+ if (source_date_epoch) {
+ hash.hash(source_date_epoch);
+ }
}
if (result & HASH_SOURCE_CODE_FOUND_TIME) {
// We don't know for sure that the program actually uses the __TIME__ macro,
// __TIME__ has been found so that the direct mode can be disabled.
LOG("Found __TIME__ in {}", path);
}
+
if (result & HASH_SOURCE_CODE_FOUND_TIMESTAMP) {
LOG("Found __TIMESTAMP__ in {}", path);
-// Copyright (C) 2010-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2010-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
{".HXX", "c++-header"},
{".tcc", "c++-header"},
{".TCC", "c++-header"},
- {".cu", "cu"},
+ {".cu", "cu"}, // Special case in language_for_file: "cuda" for Clang
{".hip", "hip"},
{nullptr, nullptr},
};
{"c++", "c++-cpp-output"},
{"c++-cpp-output", "c++-cpp-output"},
{"c++-header", "c++-cpp-output"},
- {"cu", "cpp-output"},
+ {"cu", "cpp-output"}, // NVCC
+ {"cuda", "cpp-output"}, // Clang
{"hip", "cpp-output"},
{"objective-c", "objective-c-cpp-output"},
{"objective-c-header", "objective-c-cpp-output"},
} // namespace
+bool
+supported_source_extension(const std::string& fname)
+{
+ const auto ext = Util::get_extension(fname);
+ for (size_t i = 0; k_ext_lang_table[i].extension; ++i) {
+ if (k_ext_lang_table[i].extension == ext) {
+ return true;
+ }
+ }
+ return false;
+}
+
std::string
-language_for_file(const std::string& fname)
+language_for_file(const std::string& fname, CompilerType compiler_type)
{
auto ext = Util::get_extension(fname);
+ if (ext == ".cu" && compiler_type == CompilerType::clang) {
+ // Special case: Clang maps .cu to cuda.
+ return "cuda";
+ }
for (size_t i = 0; k_ext_lang_table[i].extension; ++i) {
if (k_ext_lang_table[i].extension == ext) {
return k_ext_lang_table[i].language;
-// Copyright (C) 2010-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2010-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
#include "system.hpp"
+#include "Config.hpp"
+
#include <string>
-// Guess the language of `fname` based on its extension. Returns the empty
-// string if the extension is unknown.
-std::string language_for_file(const std::string& fname);
+// Return whether a filename has a supported source code extension.
+bool supported_source_extension(const std::string& fname);
+
+// Guess the language of `fname` based on its extension and a compiler type.
+// Returns the empty string if the extension is unknown.
+std::string language_for_file(const std::string& fname,
+ CompilerType compiler_type);
// Return the preprocessed language for `language`, or the empty string if
// unknown.
# define NOMINMAX 1
# include <windows.h>
# define mkdir(a, b) _mkdir(a)
-# define link(src, dst) (CreateHardLink(dst, src, nullptr) ? 0 : -1)
# define execv(a, b) win32execute(a, b, 0, -1, -1)
# define strncasecmp _strnicmp
# define strcasecmp _stricmp
# define O_BINARY 0
#endif
-#ifdef HAVE_SYS_MMAN_H
+#if defined(HAVE_SYS_MMAN_H) && defined(HAVE_PTHREAD_MUTEXATTR_SETPSHARED)
# define INODE_CACHE_SUPPORTED
#endif
target_compile_definitions(third_party_lib PUBLIC -DSTATIC_GETOPT)
endif()
+if(WIN32)
+ target_sources(third_party_lib PRIVATE win32/mktemp.c)
+endif ()
+
if(ENABLE_TRACING)
target_sources(third_party_lib PRIVATE minitrace.c)
endif()
# Treat third party headers as system files (no warning for those headers).
target_include_directories(
third_party_lib
- PRIVATE ${CMAKE_BINARY_DIR} . SYSTEM)
+ PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} SYSTEM)
target_link_libraries(third_party_lib PRIVATE standard_settings)
target_link_libraries(third_party_lib INTERFACE blake3)
target_link_libraries(blake3 PRIVATE standard_settings)
-if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SIZEOF_VOID_P EQUAL 8
- AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
- AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0))
- set(blake_source_type asm)
- set(blake_suffix "_x86-64_unix.S")
-else()
- set(blake_source_type c)
- set(blake_suffix ".c")
+if(MSVC)
+ # No object file is created if masm is passed the compile options from standard_settings,
+ # so don't pass any flags at all to assembler (as no flags are needed anyway).
+ string(REPLACE "<FLAGS> " "" CMAKE_ASM_MASM_COMPILE_OBJECT "${CMAKE_ASM_MASM_COMPILE_OBJECT}")
endif()
-include(CheckAsmCompilerFlag)
-include(CheckCCompilerFlag)
-
-function(add_source_if_enabled feature compile_flags)
- string(TOUPPER "have_${blake_source_type}_${feature}" have_feature)
-
- # AVX512 support fails to compile with old Apple Clang versions even though
- # the compiler accepts the -m flags.
- if(${feature} STREQUAL "avx512"
- AND CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
- AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
- message(STATUS "Detected unsupported compiler for ${have_feature} - disabled")
- set(${have_feature} FALSE)
- elseif(${blake_source_type} STREQUAL "asm")
- check_asm_compiler_flag(${compile_flags} ${have_feature})
+include(CheckCSourceCompiles)
+
+function(add_source_if_enabled feature msvc_flags others_flags intrinsic)
+ if(MSVC)
+ set(compile_flags "${msvc_flags}")
else()
- check_c_compiler_flag(${compile_flags} ${have_feature})
+ set(compile_flags "${others_flags}")
+ endif()
+
+ # First check if it's possible to use the assembler variant for the feature.
+ string(TOUPPER "have_asm_${feature}" have_feature)
+ if(NOT DEFINED "${have_feature}" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+ if(MSVC)
+ set(suffix "_x86-64_windows_msvc.asm")
+ elseif(WIN32)
+ set(suffix "_x86-64_windows_gnu.S")
+ else()
+ set(suffix "_x86-64_unix.S")
+ endif()
+
+ if(NOT CMAKE_REQUIRED_QUIET)
+ message(STATUS "Performing Test ${have_feature}")
+ endif()
+
+ set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+ # Must set CMAKE_ASM_MASM_CREATE_STATIC_LIBRARY explicitly otherwise try_compile
+ # fails, see https://discourse.cmake.org/t/building-lib-file-from-asm-cmake-bug/1959
+ try_compile(
+ ${have_feature}
+ ${CMAKE_CURRENT_BINARY_DIR}
+ "${CMAKE_CURRENT_SOURCE_DIR}/blake3_${feature}${suffix}"
+ CMAKE_FLAGS -DCMAKE_ASM_MASM_CREATE_STATIC_LIBRARY=${CMAKE_C_CREATE_STATIC_LIBRARY}
+ COMPILE_DEFINITIONS ${compile_flags})
+
+ unset(CMAKE_TRY_COMPILE_TARGET_TYPE)
+
+ if(NOT CMAKE_REQUIRED_QUIET)
+ if (${${have_feature}})
+ message(STATUS "Performing Test ${have_feature} - Success")
+ else()
+ message(STATUS "Performing Test ${have_feature} - Failed")
+ endif()
+ endif()
+ endif()
+
+ # If the assembler variant didn't work, try the c variant.
+ if(NOT ${have_feature})
+ string(TOUPPER "have_c_${feature}" have_feature)
+ set(suffix ".c")
+
+ set(CMAKE_REQUIRED_FLAGS ${compile_flags})
+ check_c_source_compiles(
+ [=[
+ #include <immintrin.h>
+ int main() { ${intrinsic}; return 0; }
+ ]=]
+ ${have_feature})
+ unset(CMAKE_REQUIRED_FLAGS)
endif()
if(${have_feature})
- target_sources(blake3 PRIVATE blake3_${feature}${blake_suffix})
- set_property(
- SOURCE blake3_${feature}${blake_suffix}
- APPEND PROPERTY COMPILE_FLAGS ${compile_flags})
+ target_sources(blake3 PRIVATE blake3_${feature}${suffix})
+ if(suffix STREQUAL ".c")
+ if(MINGW AND feature STREQUAL "avx512")
+ # Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782.
+ # Taken from blake3's build.rs.
+ set(compile_flags "${compile_flags} -fno-asynchronous-unwind-tables")
+ endif()
+ set_property(
+ SOURCE blake3_${feature}${suffix}
+ APPEND PROPERTY COMPILE_FLAGS ${compile_flags})
+ elseif(NOT MSVC)
+ set_property(
+ SOURCE blake3_${feature}${suffix}
+ PROPERTY COMPILE_FLAGS ${compile_flags})
+ endif()
else()
string(TOUPPER "blake3_no_${feature}" no_feature)
target_compile_definitions(blake3 PRIVATE ${no_feature})
endif()
endfunction()
-add_source_if_enabled(sse2 "-msse2")
-add_source_if_enabled(sse41 "-msse4.1")
-add_source_if_enabled(avx2 "-mavx2")
-add_source_if_enabled(avx512 "-mavx512f -mavx512vl")
+# https://software.intel.com/sites/landingpage/IntrinsicsGuide/
+add_source_if_enabled(sse2 "" "-msse2"
+ "_mm_set1_epi32(42)")
+add_source_if_enabled(sse41 "" "-msse4.1"
+ "_mm_test_all_ones(_mm_set1_epi32(42))")
+add_source_if_enabled(avx2 "/arch:AVX2" "-mavx2"
+ "_mm256_abs_epi8(_mm256_set1_epi32(42))")
+add_source_if_enabled(avx512 "/arch:AVX512" "-mavx512f -mavx512vl"
+ "_mm256_abs_epi64(_mm256_set1_epi32(42))")
-# TODO: how to detect ARM NEON support?
-# If NEON, define BLAKE3_USE_NEON and build blake3_neon.c
+# Neon is always available on AArch64
+if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ # https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics
+ check_c_source_compiles(
+ [=[
+ #include <arm_neon.h>
+ int main() { vdupq_n_s32(42); return 0; }
+ ]=]
+ HAVE_NEON)
+ if(HAVE_NEON)
+ target_sources(blake3 PRIVATE blake3_neon.c)
+ target_compile_definitions(blake3 PRIVATE BLAKE3_USE_NEON)
+ endif()
+endif()
#include "blake3.h"
#include "blake3_impl.h"
+const char * blake3_version(void) {
+ return BLAKE3_VERSION_STRING;
+}
+
INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
uint8_t flags) {
memcpy(self->cv, key, BLAKE3_KEY_LEN);
extern "C" {
#endif
+#define BLAKE3_VERSION_STRING "0.3.7"
#define BLAKE3_KEY_LEN 32
#define BLAKE3_OUT_LEN 32
#define BLAKE3_BLOCK_LEN 64
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
} blake3_hasher;
+const char * blake3_version(void);
void blake3_hasher_init(blake3_hasher *self);
void blake3_hasher_init_keyed(blake3_hasher *self,
const uint8_t key[BLAKE3_KEY_LEN]);
--- /dev/null
+public _blake3_hash_many_avx2
+public blake3_hash_many_avx2
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_avx2 PROC
+_blake3_hash_many_avx2 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 880
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ vmovdqa xmmword ptr [rsp+2D0H], xmm6
+ vmovdqa xmmword ptr [rsp+2E0H], xmm7
+ vmovdqa xmmword ptr [rsp+2F0H], xmm8
+ vmovdqa xmmword ptr [rsp+300H], xmm9
+ vmovdqa xmmword ptr [rsp+310H], xmm10
+ vmovdqa xmmword ptr [rsp+320H], xmm11
+ vmovdqa xmmword ptr [rsp+330H], xmm12
+ vmovdqa xmmword ptr [rsp+340H], xmm13
+ vmovdqa xmmword ptr [rsp+350H], xmm14
+ vmovdqa xmmword ptr [rsp+360H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9d
+ vmovd xmm0, r9d
+ vpbroadcastd ymm0, xmm0
+ vmovdqa ymmword ptr [rsp+260H], ymm0
+ vpand ymm1, ymm0, ymmword ptr [ADD0]
+ vpand ymm2, ymm0, ymmword ptr [ADD1]
+ vmovdqa ymmword ptr [rsp+2A0H], ymm2
+ vmovd xmm2, r8d
+ vpbroadcastd ymm2, xmm2
+ vpaddd ymm2, ymm2, ymm1
+ vmovdqa ymmword ptr [rsp+220H], ymm2
+ vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK]
+ vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK]
+ vpcmpgtd ymm2, ymm1, ymm2
+ shr r8, 32
+ vmovd xmm3, r8d
+ vpbroadcastd ymm3, xmm3
+ vpsubd ymm3, ymm3, ymm2
+ vmovdqa ymmword ptr [rsp+240H], ymm3
+ shl rdx, 6
+ mov qword ptr [rsp+2C0H], rdx
+ cmp rsi, 8
+ jc final7blocks
+outerloop8:
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+4H]
+ vpbroadcastd ymm2, dword ptr [rcx+8H]
+ vpbroadcastd ymm3, dword ptr [rcx+0CH]
+ vpbroadcastd ymm4, dword ptr [rcx+10H]
+ vpbroadcastd ymm5, dword ptr [rcx+14H]
+ vpbroadcastd ymm6, dword ptr [rcx+18H]
+ vpbroadcastd ymm7, dword ptr [rcx+1CH]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+20H]
+ mov r13, qword ptr [rdi+28H]
+ mov r14, qword ptr [rdi+30H]
+ mov r15, qword ptr [rdi+38H]
+ movzx eax, byte ptr [rbp+78H]
+ movzx ebx, byte ptr [rbp+80H]
+ or eax, ebx
+ xor edx, edx
+ALIGN 16
+innerloop8:
+ movzx ebx, byte ptr [rbp+88H]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+2C0H]
+ cmove eax, ebx
+ mov dword ptr [rsp+200H], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-40H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-40H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-40H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+20H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+40H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+60H], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-30H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-30H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-30H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+80H], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0A0H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0C0H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0E0H], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-20H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-20H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-20H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+100H], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+120H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+140H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+160H], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-10H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-10H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-10H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+180H], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+1A0H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+1C0H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+1E0H], ymm11
+ vpbroadcastd ymm15, dword ptr [rsp+200H]
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r12+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r13+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r14+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ prefetcht0 byte ptr [r15+rdx+80H]
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+40H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+80H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm0, ymmword ptr [rsp+220H]
+ vpxor ymm13, ymm1, ymmword ptr [rsp+240H]
+ vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN]
+ vpxor ymm15, ymm3, ymm15
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0]
+ vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1]
+ vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2]
+ vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3]
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+20H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+60H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+100H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+180H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+120H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+40H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+60H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0E0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+80H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+20H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+120H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+160H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+60H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+80H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+40H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+120H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+160H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0A0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+20H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+140H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+120H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+60H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+80H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+20H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+40H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+100H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+180H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+120H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+140H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+40H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+60H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+20H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+80H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+120H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+100H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+180H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+20H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+40H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+80H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+60H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0C0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+160H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+20H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1E0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+120H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+40H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+60H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+140H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+80H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+78H]
+ jne innerloop8
+ mov rbx, qword ptr [rbp+90H]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0CCH
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0CCH
+ vblendps ymm3, ymm12, ymm9, 0CCH
+ vperm2f128 ymm12, ymm1, ymm2, 20H
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0CCH
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 20H
+ vmovups ymmword ptr [rbx+20H], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0CCH
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0CCH
+ vblendps ymm14, ymm14, ymm13, 0CCH
+ vperm2f128 ymm8, ymm10, ymm14, 20H
+ vmovups ymmword ptr [rbx+40H], ymm8
+ vblendps ymm15, ymm13, ymm15, 0CCH
+ vperm2f128 ymm13, ymm6, ymm15, 20H
+ vmovups ymmword ptr [rbx+60H], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 31H
+ vperm2f128 ymm11, ymm3, ymm4, 31H
+ vmovups ymmword ptr [rbx+80H], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 31H
+ vperm2f128 ymm15, ymm6, ymm15, 31H
+ vmovups ymmword ptr [rbx+0A0H], ymm11
+ vmovups ymmword ptr [rbx+0C0H], ymm14
+ vmovups ymmword ptr [rbx+0E0H], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp+2A0H]
+ vpaddd ymm1, ymm0, ymmword ptr [rsp+220H]
+ vmovdqa ymmword ptr [rsp+220H], ymm1
+ vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK]
+ vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK]
+ vpcmpgtd ymm2, ymm0, ymm2
+ vmovdqa ymm0, ymmword ptr [rsp+240H]
+ vpsubd ymm2, ymm0, ymm2
+ vmovdqa ymmword ptr [rsp+240H], ymm2
+ add rdi, 64
+ add rbx, 256
+ mov qword ptr [rbp+90H], rbx
+ sub rsi, 8
+ cmp rsi, 8
+ jnc outerloop8
+ test rsi, rsi
+ jnz final7blocks
+unwind:
+ vzeroupper
+ vmovdqa xmm6, xmmword ptr [rsp+2D0H]
+ vmovdqa xmm7, xmmword ptr [rsp+2E0H]
+ vmovdqa xmm8, xmmword ptr [rsp+2F0H]
+ vmovdqa xmm9, xmmword ptr [rsp+300H]
+ vmovdqa xmm10, xmmword ptr [rsp+310H]
+ vmovdqa xmm11, xmmword ptr [rsp+320H]
+ vmovdqa xmm12, xmmword ptr [rsp+330H]
+ vmovdqa xmm13, xmmword ptr [rsp+340H]
+ vmovdqa xmm14, xmmword ptr [rsp+350H]
+ vmovdqa xmm15, xmmword ptr [rsp+360H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final7blocks:
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, qword ptr [rsp+2C0H]
+ movzx r13d, byte ptr [rbp+78H]
+ movzx r12d, byte ptr [rbp+88H]
+ test rsi, 4H
+ je final3blocks
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+10H]
+ vmovdqa ymm8, ymm0
+ vmovdqa ymm9, ymm1
+ vbroadcasti128 ymm12, xmmword ptr [rsp+220H]
+ vbroadcasti128 ymm13, xmmword ptr [rsp+240H]
+ vpunpckldq ymm14, ymm12, ymm13
+ vpunpckhdq ymm15, ymm12, ymm13
+ vpermq ymm14, ymm14, 50H
+ vpermq ymm15, ymm15, 50H
+ vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN]
+ vpblendd ymm14, ymm14, ymm12, 44H
+ vpblendd ymm15, ymm15, ymm12, 44H
+ vmovdqa ymmword ptr [rsp], ymm14
+ vmovdqa ymmword ptr [rsp+20H], ymm15
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+200H], eax
+ vmovups ymm2, ymmword ptr [r8+rdx-40H]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-40H], 01H
+ vmovups ymm3, ymmword ptr [r8+rdx-30H]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-30H], 01H
+ vshufps ymm4, ymm2, ymm3, 136
+ vshufps ymm5, ymm2, ymm3, 221
+ vmovups ymm2, ymmword ptr [r8+rdx-20H]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-20H], 01H
+ vmovups ymm3, ymmword ptr [r8+rdx-10H]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-10H], 01H
+ vshufps ymm6, ymm2, ymm3, 136
+ vshufps ymm7, ymm2, ymm3, 221
+ vpshufd ymm6, ymm6, 93H
+ vpshufd ymm7, ymm7, 93H
+ vmovups ymm10, ymmword ptr [r10+rdx-40H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-40H], 01H
+ vmovups ymm11, ymmword ptr [r10+rdx-30H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-30H], 01H
+ vshufps ymm12, ymm10, ymm11, 136
+ vshufps ymm13, ymm10, ymm11, 221
+ vmovups ymm10, ymmword ptr [r10+rdx-20H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-20H], 01H
+ vmovups ymm11, ymmword ptr [r10+rdx-10H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-10H], 01H
+ vshufps ymm14, ymm10, ymm11, 136
+ vshufps ymm15, ymm10, ymm11, 221
+ vpshufd ymm14, ymm14, 93H
+ vpshufd ymm15, ymm15, 93H
+ vpbroadcastd ymm2, dword ptr [rsp+200H]
+ vmovdqa ymm3, ymmword ptr [rsp]
+ vmovdqa ymm11, ymmword ptr [rsp+20H]
+ vpblendd ymm3, ymm3, ymm2, 88H
+ vpblendd ymm11, ymm11, ymm2, 88H
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV]
+ vmovdqa ymm10, ymm2
+ mov al, 7
+roundloop4:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm8, ymm8, ymm12
+ vmovdqa ymmword ptr [rsp+40H], ymm4
+ nop
+ vmovdqa ymmword ptr [rsp+60H], ymm12
+ nop
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vmovdqa ymmword ptr [rsp+80H], ymm5
+ vmovdqa ymmword ptr [rsp+0A0H], ymm13
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 93H
+ vpshufd ymm8, ymm8, 93H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm11, ymm11, 4EH
+ vpshufd ymm2, ymm2, 39H
+ vpshufd ymm10, ymm10, 39H
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm8, ymm8, ymm14
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm8, ymm8, ymm15
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 39H
+ vpshufd ymm8, ymm8, 39H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm11, ymm11, 4EH
+ vpshufd ymm2, ymm2, 93H
+ vpshufd ymm10, ymm10, 93H
+ dec al
+ je endroundloop4
+ vmovdqa ymm4, ymmword ptr [rsp+40H]
+ vmovdqa ymm5, ymmword ptr [rsp+80H]
+ vshufps ymm12, ymm4, ymm5, 214
+ vpshufd ymm13, ymm4, 0FH
+ vpshufd ymm4, ymm12, 39H
+ vshufps ymm12, ymm6, ymm7, 250
+ vpblendd ymm13, ymm13, ymm12, 0AAH
+ vpunpcklqdq ymm12, ymm7, ymm5
+ vpblendd ymm12, ymm12, ymm6, 88H
+ vpshufd ymm12, ymm12, 78H
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 1EH
+ vmovdqa ymmword ptr [rsp+40H], ymm13
+ vmovdqa ymmword ptr [rsp+80H], ymm12
+ vmovdqa ymm12, ymmword ptr [rsp+60H]
+ vmovdqa ymm13, ymmword ptr [rsp+0A0H]
+ vshufps ymm5, ymm12, ymm13, 214
+ vpshufd ymm6, ymm12, 0FH
+ vpshufd ymm12, ymm5, 39H
+ vshufps ymm5, ymm14, ymm15, 250
+ vpblendd ymm6, ymm6, ymm5, 0AAH
+ vpunpcklqdq ymm5, ymm15, ymm13
+ vpblendd ymm5, ymm5, ymm14, 88H
+ vpshufd ymm5, ymm5, 78H
+ vpunpckhdq ymm13, ymm13, ymm15
+ vpunpckldq ymm14, ymm14, ymm13
+ vpshufd ymm15, ymm14, 1EH
+ vmovdqa ymm13, ymm6
+ vmovdqa ymm14, ymm5
+ vmovdqa ymm5, ymmword ptr [rsp+40H]
+ vmovdqa ymm6, ymmword ptr [rsp+80H]
+ jmp roundloop4
+endroundloop4:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ vpxor ymm8, ymm8, ymm10
+ vpxor ymm9, ymm9, ymm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop4
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vmovdqu xmmword ptr [rbx+40H], xmm8
+ vmovdqu xmmword ptr [rbx+50H], xmm9
+ vextracti128 xmmword ptr [rbx+60H], ymm8, 01H
+ vextracti128 xmmword ptr [rbx+70H], ymm9, 01H
+ vmovaps xmm8, xmmword ptr [rsp+260H]
+ vmovaps xmm0, xmmword ptr [rsp+220H]
+ vmovaps xmm1, xmmword ptr [rsp+230H]
+ vmovaps xmm2, xmmword ptr [rsp+240H]
+ vmovaps xmm3, xmmword ptr [rsp+250H]
+ vblendvps xmm0, xmm0, xmm1, xmm8
+ vblendvps xmm2, xmm2, xmm3, xmm8
+ vmovaps xmmword ptr [rsp+220H], xmm0
+ vmovaps xmmword ptr [rsp+240H], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+final3blocks:
+ test rsi, 2H
+ je final1blocks
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+10H]
+ vmovd xmm13, dword ptr [rsp+220H]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+240H], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovd xmm14, dword ptr [rsp+224H]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+244H], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vinserti128 ymm13, ymm13, xmm14, 01H
+ vbroadcasti128 ymm14, xmmword ptr [ROT16]
+ vbroadcasti128 ymm15, xmmword ptr [ROT8]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+200H], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV]
+ vpbroadcastd ymm8, dword ptr [rsp+200H]
+ vpblendd ymm3, ymm13, ymm8, 88H
+ vmovups ymm8, ymmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 93H
+ vpshufd ymm7, ymm7, 93H
+ mov al, 7
+roundloop2:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 93H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 39H
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 39H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 93H
+ dec al
+ jz endroundloop2
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0FH
+ vpshufd ymm4, ymm8, 39H
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0AAH
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 88H
+ vpshufd ymm8, ymm8, 78H
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 1EH
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp roundloop2
+endroundloop2:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vmovaps ymm8, ymmword ptr [rsp+260H]
+ vmovaps ymm0, ymmword ptr [rsp+220H]
+ vmovups ymm1, ymmword ptr [rsp+228H]
+ vmovaps ymm2, ymmword ptr [rsp+240H]
+ vmovups ymm3, ymmword ptr [rsp+248H]
+ vblendvps ymm0, ymm0, ymm1, ymm8
+ vblendvps ymm2, ymm2, ymm3, ymm8
+ vmovaps ymmword ptr [rsp+220H], ymm0
+ vmovaps ymmword ptr [rsp+240H], ymm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+final1blocks:
+ test rsi, 1H
+ je unwind
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ vmovd xmm3, dword ptr [rsp+220H]
+ vpinsrd xmm3, xmm3, dword ptr [rsp+240H], 1
+ vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovdqa xmm14, xmmword ptr [ROT16]
+ vmovdqa xmm15, xmmword ptr [ROT8]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vmovdqa xmm2, xmmword ptr [BLAKE3_IV]
+ vmovdqa xmm3, xmm13
+ vpinsrd xmm3, xmm3, eax, 3
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vmovups xmm9, xmmword ptr [r8+rdx-30H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vmovups xmm9, xmmword ptr [r8+rdx-10H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+roundloop1:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+
+_blake3_hash_many_avx2 ENDP
+blake3_hash_many_avx2 ENDP
+_TEXT ENDS
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+ADD0:
+ dd 0, 1, 2, 3, 4, 5, 6, 7
+
+ADD1:
+ dd 8 dup (8)
+
+BLAKE3_IV_0:
+ dd 8 dup (6A09E667H)
+
+BLAKE3_IV_1:
+ dd 8 dup (0BB67AE85H)
+
+BLAKE3_IV_2:
+ dd 8 dup (3C6EF372H)
+
+BLAKE3_IV_3:
+ dd 8 dup (0A54FF53AH)
+
+BLAKE3_BLOCK_LEN:
+ dd 8 dup (64)
+
+ROT16:
+ db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+
+ROT8:
+ db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+
+CMP_MSB_MASK:
+ dd 8 dup(80000000H)
+
+BLAKE3_IV:
+ dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH
+
+_RDATA ENDS
+END
--- /dev/null
+public _blake3_hash_many_avx512
+public blake3_hash_many_avx512
+public blake3_compress_in_place_avx512
+public _blake3_compress_in_place_avx512
+public blake3_compress_xof_avx512
+public _blake3_compress_xof_avx512
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_avx512 PROC
+_blake3_hash_many_avx512 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rdi
+ push rsi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 304
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ vmovdqa xmmword ptr [rsp+90H], xmm6
+ vmovdqa xmmword ptr [rsp+0A0H], xmm7
+ vmovdqa xmmword ptr [rsp+0B0H], xmm8
+ vmovdqa xmmword ptr [rsp+0C0H], xmm9
+ vmovdqa xmmword ptr [rsp+0D0H], xmm10
+ vmovdqa xmmword ptr [rsp+0E0H], xmm11
+ vmovdqa xmmword ptr [rsp+0F0H], xmm12
+ vmovdqa xmmword ptr [rsp+100H], xmm13
+ vmovdqa xmmword ptr [rsp+110H], xmm14
+ vmovdqa xmmword ptr [rsp+120H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9
+ kmovw k1, r9d
+ vmovd xmm0, r8d
+ vpbroadcastd ymm0, xmm0
+ shr r8, 32
+ vmovd xmm1, r8d
+ vpbroadcastd ymm1, xmm1
+ vmovdqa ymm4, ymm1
+ vmovdqa ymm5, ymm1
+ vpaddd ymm2, ymm0, ymmword ptr [ADD0]
+ vpaddd ymm3, ymm0, ymmword ptr [ADD0+32]
+ vpcmpud k2, ymm2, ymm0, 1
+ vpcmpud k3, ymm3, ymm0, 1
+ ; XXX: ml64.exe does not currently understand the syntax. We use a workaround.
+ vpbroadcastd ymm6, dword ptr [ADD1]
+ vpaddd ymm4 {k2}, ymm4, ymm6
+ vpaddd ymm5 {k3}, ymm5, ymm6
+ ; vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1] {1to8}
+ ; vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1] {1to8}
+ knotw k2, k1
+ vmovdqa32 ymm2 {k2}, ymm0
+ vmovdqa32 ymm3 {k2}, ymm0
+ vmovdqa32 ymm4 {k2}, ymm1
+ vmovdqa32 ymm5 {k2}, ymm1
+ vmovdqa ymmword ptr [rsp], ymm2
+ vmovdqa ymmword ptr [rsp+20H], ymm3
+ vmovdqa ymmword ptr [rsp+40H], ymm4
+ vmovdqa ymmword ptr [rsp+60H], ymm5
+ shl rdx, 6
+ mov qword ptr [rsp+80H], rdx
+ cmp rsi, 16
+ jc final15blocks
+outerloop16:
+ vpbroadcastd zmm0, dword ptr [rcx]
+ vpbroadcastd zmm1, dword ptr [rcx+1H*4H]
+ vpbroadcastd zmm2, dword ptr [rcx+2H*4H]
+ vpbroadcastd zmm3, dword ptr [rcx+3H*4H]
+ vpbroadcastd zmm4, dword ptr [rcx+4H*4H]
+ vpbroadcastd zmm5, dword ptr [rcx+5H*4H]
+ vpbroadcastd zmm6, dword ptr [rcx+6H*4H]
+ vpbroadcastd zmm7, dword ptr [rcx+7H*4H]
+ movzx eax, byte ptr [rbp+78H]
+ movzx ebx, byte ptr [rbp+80H]
+ or eax, ebx
+ xor edx, edx
+ALIGN 16
+innerloop16:
+ movzx ebx, byte ptr [rbp+88H]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+80H]
+ cmove eax, ebx
+ mov dword ptr [rsp+88H], eax
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+40H]
+ mov r13, qword ptr [rdi+48H]
+ mov r14, qword ptr [rdi+50H]
+ mov r15, qword ptr [rdi+58H]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
+ vpunpcklqdq zmm8, zmm16, zmm17
+ vpunpckhqdq zmm9, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
+ vpunpcklqdq zmm10, zmm18, zmm19
+ vpunpckhqdq zmm11, zmm18, zmm19
+ mov r8, qword ptr [rdi+20H]
+ mov r9, qword ptr [rdi+28H]
+ mov r10, qword ptr [rdi+30H]
+ mov r11, qword ptr [rdi+38H]
+ mov r12, qword ptr [rdi+60H]
+ mov r13, qword ptr [rdi+68H]
+ mov r14, qword ptr [rdi+70H]
+ mov r15, qword ptr [rdi+78H]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
+ vpunpcklqdq zmm12, zmm16, zmm17
+ vpunpckhqdq zmm13, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
+ vpunpcklqdq zmm14, zmm18, zmm19
+ vpunpckhqdq zmm15, zmm18, zmm19
+ vmovdqa32 zmm27, zmmword ptr [INDEX0]
+ vmovdqa32 zmm31, zmmword ptr [INDEX1]
+ vshufps zmm16, zmm8, zmm10, 136
+ vshufps zmm17, zmm12, zmm14, 136
+ vmovdqa32 zmm20, zmm16
+ vpermt2d zmm16, zmm27, zmm17
+ vpermt2d zmm20, zmm31, zmm17
+ vshufps zmm17, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm21, zmm17
+ vpermt2d zmm17, zmm27, zmm30
+ vpermt2d zmm21, zmm31, zmm30
+ vshufps zmm18, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm22, zmm18
+ vpermt2d zmm18, zmm27, zmm8
+ vpermt2d zmm22, zmm31, zmm8
+ vshufps zmm19, zmm9, zmm11, 221
+ vshufps zmm8, zmm13, zmm15, 221
+ vmovdqa32 zmm23, zmm19
+ vpermt2d zmm19, zmm27, zmm8
+ vpermt2d zmm23, zmm31, zmm8
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+40H]
+ mov r13, qword ptr [rdi+48H]
+ mov r14, qword ptr [rdi+50H]
+ mov r15, qword ptr [rdi+58H]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
+ vpunpcklqdq zmm8, zmm24, zmm25
+ vpunpckhqdq zmm9, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
+ vpunpcklqdq zmm10, zmm24, zmm25
+ vpunpckhqdq zmm11, zmm24, zmm25
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r12+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r13+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r14+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ prefetcht0 byte ptr [r15+rdx+80H]
+ mov r8, qword ptr [rdi+20H]
+ mov r9, qword ptr [rdi+28H]
+ mov r10, qword ptr [rdi+30H]
+ mov r11, qword ptr [rdi+38H]
+ mov r12, qword ptr [rdi+60H]
+ mov r13, qword ptr [rdi+68H]
+ mov r14, qword ptr [rdi+70H]
+ mov r15, qword ptr [rdi+78H]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
+ vpunpcklqdq zmm12, zmm24, zmm25
+ vpunpckhqdq zmm13, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
+ vpunpcklqdq zmm14, zmm24, zmm25
+ vpunpckhqdq zmm15, zmm24, zmm25
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r12+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r13+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r14+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ prefetcht0 byte ptr [r15+rdx+80H]
+ vshufps zmm24, zmm8, zmm10, 136
+ vshufps zmm30, zmm12, zmm14, 136
+ vmovdqa32 zmm28, zmm24
+ vpermt2d zmm24, zmm27, zmm30
+ vpermt2d zmm28, zmm31, zmm30
+ vshufps zmm25, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm29, zmm25
+ vpermt2d zmm25, zmm27, zmm30
+ vpermt2d zmm29, zmm31, zmm30
+ vshufps zmm26, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm30, zmm26
+ vpermt2d zmm26, zmm27, zmm8
+ vpermt2d zmm30, zmm31, zmm8
+ vshufps zmm8, zmm9, zmm11, 221
+ vshufps zmm10, zmm13, zmm15, 221
+ vpermi2d zmm27, zmm8, zmm10
+ vpermi2d zmm31, zmm8, zmm10
+ vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0]
+ vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1]
+ vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2]
+ vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3]
+ vmovdqa32 zmm12, zmmword ptr [rsp]
+ vmovdqa32 zmm13, zmmword ptr [rsp+1H*40H]
+ vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN]
+ vpbroadcastd zmm15, dword ptr [rsp+22H*4H]
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm24
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm23
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm27
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm21
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm28
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm26
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm22
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm31
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpxord zmm0, zmm0, zmm8
+ vpxord zmm1, zmm1, zmm9
+ vpxord zmm2, zmm2, zmm10
+ vpxord zmm3, zmm3, zmm11
+ vpxord zmm4, zmm4, zmm12
+ vpxord zmm5, zmm5, zmm13
+ vpxord zmm6, zmm6, zmm14
+ vpxord zmm7, zmm7, zmm15
+ movzx eax, byte ptr [rbp+78H]
+ jne innerloop16
+ mov rbx, qword ptr [rbp+90H]
+ vpunpckldq zmm16, zmm0, zmm1
+ vpunpckhdq zmm17, zmm0, zmm1
+ vpunpckldq zmm18, zmm2, zmm3
+ vpunpckhdq zmm19, zmm2, zmm3
+ vpunpckldq zmm20, zmm4, zmm5
+ vpunpckhdq zmm21, zmm4, zmm5
+ vpunpckldq zmm22, zmm6, zmm7
+ vpunpckhdq zmm23, zmm6, zmm7
+ vpunpcklqdq zmm0, zmm16, zmm18
+ vpunpckhqdq zmm1, zmm16, zmm18
+ vpunpcklqdq zmm2, zmm17, zmm19
+ vpunpckhqdq zmm3, zmm17, zmm19
+ vpunpcklqdq zmm4, zmm20, zmm22
+ vpunpckhqdq zmm5, zmm20, zmm22
+ vpunpcklqdq zmm6, zmm21, zmm23
+ vpunpckhqdq zmm7, zmm21, zmm23
+ vshufi32x4 zmm16, zmm0, zmm4, 88H
+ vshufi32x4 zmm17, zmm1, zmm5, 88H
+ vshufi32x4 zmm18, zmm2, zmm6, 88H
+ vshufi32x4 zmm19, zmm3, zmm7, 88H
+ vshufi32x4 zmm20, zmm0, zmm4, 0DDH
+ vshufi32x4 zmm21, zmm1, zmm5, 0DDH
+ vshufi32x4 zmm22, zmm2, zmm6, 0DDH
+ vshufi32x4 zmm23, zmm3, zmm7, 0DDH
+ vshufi32x4 zmm0, zmm16, zmm17, 88H
+ vshufi32x4 zmm1, zmm18, zmm19, 88H
+ vshufi32x4 zmm2, zmm20, zmm21, 88H
+ vshufi32x4 zmm3, zmm22, zmm23, 88H
+ vshufi32x4 zmm4, zmm16, zmm17, 0DDH
+ vshufi32x4 zmm5, zmm18, zmm19, 0DDH
+ vshufi32x4 zmm6, zmm20, zmm21, 0DDH
+ vshufi32x4 zmm7, zmm22, zmm23, 0DDH
+ vmovdqu32 zmmword ptr [rbx], zmm0
+ vmovdqu32 zmmword ptr [rbx+1H*40H], zmm1
+ vmovdqu32 zmmword ptr [rbx+2H*40H], zmm2
+ vmovdqu32 zmmword ptr [rbx+3H*40H], zmm3
+ vmovdqu32 zmmword ptr [rbx+4H*40H], zmm4
+ vmovdqu32 zmmword ptr [rbx+5H*40H], zmm5
+ vmovdqu32 zmmword ptr [rbx+6H*40H], zmm6
+ vmovdqu32 zmmword ptr [rbx+7H*40H], zmm7
+ vmovdqa32 zmm0, zmmword ptr [rsp]
+ vmovdqa32 zmm1, zmmword ptr [rsp+1H*40H]
+ vmovdqa32 zmm2, zmm0
+ ; XXX: ml64.exe does not currently understand the syntax. We use a workaround.
+ vpbroadcastd zmm4, dword ptr [ADD16]
+ vpbroadcastd zmm5, dword ptr [ADD1]
+ vpaddd zmm2{k1}, zmm0, zmm4
+ ; vpaddd zmm2{k1}, zmm0, dword ptr [ADD16] ; {1to16}
+ vpcmpud k2, zmm2, zmm0, 1
+ vpaddd zmm1 {k2}, zmm1, zmm5
+ ; vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1] ; {1to16}
+ vmovdqa32 zmmword ptr [rsp], zmm2
+ vmovdqa32 zmmword ptr [rsp+1H*40H], zmm1
+ add rdi, 128
+ add rbx, 512
+ mov qword ptr [rbp+90H], rbx
+ sub rsi, 16
+ cmp rsi, 16
+ jnc outerloop16
+ test rsi, rsi
+ jne final15blocks
+unwind:
+ vzeroupper
+ vmovdqa xmm6, xmmword ptr [rsp+90H]
+ vmovdqa xmm7, xmmword ptr [rsp+0A0H]
+ vmovdqa xmm8, xmmword ptr [rsp+0B0H]
+ vmovdqa xmm9, xmmword ptr [rsp+0C0H]
+ vmovdqa xmm10, xmmword ptr [rsp+0D0H]
+ vmovdqa xmm11, xmmword ptr [rsp+0E0H]
+ vmovdqa xmm12, xmmword ptr [rsp+0F0H]
+ vmovdqa xmm13, xmmword ptr [rsp+100H]
+ vmovdqa xmm14, xmmword ptr [rsp+110H]
+ vmovdqa xmm15, xmmword ptr [rsp+120H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rsi
+ pop rdi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final15blocks:
+ test esi, 8H
+ je final7blocks
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+4H]
+ vpbroadcastd ymm2, dword ptr [rcx+8H]
+ vpbroadcastd ymm3, dword ptr [rcx+0CH]
+ vpbroadcastd ymm4, dword ptr [rcx+10H]
+ vpbroadcastd ymm5, dword ptr [rcx+14H]
+ vpbroadcastd ymm6, dword ptr [rcx+18H]
+ vpbroadcastd ymm7, dword ptr [rcx+1CH]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+20H]
+ mov r13, qword ptr [rdi+28H]
+ mov r14, qword ptr [rdi+30H]
+ mov r15, qword ptr [rdi+38H]
+ movzx eax, byte ptr [rbp+78H]
+ movzx ebx, byte ptr [rbp+80H]
+ or eax, ebx
+ xor edx, edx
+innerloop8:
+ movzx ebx, byte ptr [rbp+88H]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+80H]
+ cmove eax, ebx
+ mov dword ptr [rsp+88H], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-40H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-40H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-40H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm16, ymm12, ymm14, 136
+ vshufps ymm17, ymm12, ymm14, 221
+ vshufps ymm18, ymm13, ymm15, 136
+ vshufps ymm19, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-30H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-30H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-30H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm20, ymm12, ymm14, 136
+ vshufps ymm21, ymm12, ymm14, 221
+ vshufps ymm22, ymm13, ymm15, 136
+ vshufps ymm23, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-20H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-20H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-20H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm24, ymm12, ymm14, 136
+ vshufps ymm25, ymm12, ymm14, 221
+ vshufps ymm26, ymm13, ymm15, 136
+ vshufps ymm27, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-10H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-10H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-10H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm28, ymm12, ymm14, 136
+ vshufps ymm29, ymm12, ymm14, 221
+ vshufps ymm30, ymm13, ymm15, 136
+ vshufps ymm31, ymm13, ymm15, 221
+ vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0]
+ vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1]
+ vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2]
+ vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3]
+ vmovdqa ymm12, ymmword ptr [rsp]
+ vmovdqa ymm13, ymmword ptr [rsp+40H]
+ vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN]
+ vpbroadcastd ymm15, dword ptr [rsp+88H]
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm24
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm23
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm27
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm21
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm28
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm26
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm22
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm31
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+78H]
+ jne innerloop8
+ mov rbx, qword ptr [rbp+90H]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0CCH
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0CCH
+ vblendps ymm3, ymm12, ymm9, 0CCH
+ vperm2f128 ymm12, ymm1, ymm2, 20H
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0CCH
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 20H
+ vmovups ymmword ptr [rbx+20H], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0CCH
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0CCH
+ vblendps ymm14, ymm14, ymm13, 0CCH
+ vperm2f128 ymm8, ymm10, ymm14, 20H
+ vmovups ymmword ptr [rbx+40H], ymm8
+ vblendps ymm15, ymm13, ymm15, 0CCH
+ vperm2f128 ymm13, ymm6, ymm15, 20H
+ vmovups ymmword ptr [rbx+60H], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 31H
+ vperm2f128 ymm11, ymm3, ymm4, 31H
+ vmovups ymmword ptr [rbx+80H], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 31H
+ vperm2f128 ymm15, ymm6, ymm15, 31H
+ vmovups ymmword ptr [rbx+0A0H], ymm11
+ vmovups ymmword ptr [rbx+0C0H], ymm14
+ vmovups ymmword ptr [rbx+0E0H], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp]
+ vmovdqa ymm2, ymmword ptr [rsp+40H]
+ vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+1H*20H]
+ vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+3H*20H]
+ vmovdqa ymmword ptr [rsp], ymm0
+ vmovdqa ymmword ptr [rsp+40H], ymm2
+ add rbx, 256
+ mov qword ptr [rbp+90H], rbx
+ add rdi, 64
+ sub rsi, 8
+final7blocks:
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, qword ptr [rsp+80H]
+ movzx r13, byte ptr [rbp+78H]
+ movzx r12, byte ptr [rbp+88H]
+ test esi, 4H
+ je final3blocks
+ vbroadcasti32x4 zmm0, xmmword ptr [rcx]
+ vbroadcasti32x4 zmm1, xmmword ptr [rcx+1H*10H]
+ vmovdqa xmm12, xmmword ptr [rsp]
+ vmovdqa xmm13, xmmword ptr [rsp+40H]
+ vpunpckldq xmm14, xmm12, xmm13
+ vpunpckhdq xmm15, xmm12, xmm13
+ vpermq ymm14, ymm14, 0DCH
+ vpermq ymm15, ymm15, 0DCH
+ vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN]
+ vinserti64x4 zmm13, zmm14, ymm15, 01H
+ mov eax, 17476
+ kmovw k2, eax
+ vpblendmd zmm13 {k2}, zmm13, zmm12
+ vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov eax, 43690
+ kmovw k3, eax
+ mov eax, 34952
+ kmovw k4, eax
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+88H], eax
+ vmovdqa32 zmm2, zmm15
+ vpbroadcastd zmm8, dword ptr [rsp+22H*4H]
+ vpblendmd zmm3 {k4}, zmm13, zmm8
+ vmovups zmm8, zmmword ptr [r8+rdx-1H*40H]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-4H*10H], 01H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-4H*10H], 02H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-4H*10H], 03H
+ vmovups zmm9, zmmword ptr [r8+rdx-30H]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-3H*10H], 01H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-3H*10H], 02H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-3H*10H], 03H
+ vshufps zmm4, zmm8, zmm9, 136
+ vshufps zmm5, zmm8, zmm9, 221
+ vmovups zmm8, zmmword ptr [r8+rdx-20H]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-2H*10H], 01H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-2H*10H], 02H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-2H*10H], 03H
+ vmovups zmm9, zmmword ptr [r8+rdx-10H]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-1H*10H], 01H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-1H*10H], 02H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-1H*10H], 03H
+ vshufps zmm6, zmm8, zmm9, 136
+ vshufps zmm7, zmm8, zmm9, 221
+ vpshufd zmm6, zmm6, 93H
+ vpshufd zmm7, zmm7, 93H
+ mov al, 7
+roundloop4:
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 93H
+ vpshufd zmm3, zmm3, 4EH
+ vpshufd zmm2, zmm2, 39H
+ vpaddd zmm0, zmm0, zmm6
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm7
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 39H
+ vpshufd zmm3, zmm3, 4EH
+ vpshufd zmm2, zmm2, 93H
+ dec al
+ jz endroundloop4
+ vshufps zmm8, zmm4, zmm5, 214
+ vpshufd zmm9, zmm4, 0FH
+ vpshufd zmm4, zmm8, 39H
+ vshufps zmm8, zmm6, zmm7, 250
+ vpblendmd zmm9 {k3}, zmm9, zmm8
+ vpunpcklqdq zmm8, zmm7, zmm5
+ vpblendmd zmm8 {k4}, zmm8, zmm6
+ vpshufd zmm8, zmm8, 78H
+ vpunpckhdq zmm5, zmm5, zmm7
+ vpunpckldq zmm6, zmm6, zmm5
+ vpshufd zmm7, zmm6, 1EH
+ vmovdqa32 zmm5, zmm9
+ vmovdqa32 zmm6, zmm8
+ jmp roundloop4
+endroundloop4:
+ vpxord zmm0, zmm0, zmm2
+ vpxord zmm1, zmm1, zmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop4
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vextracti32x4 xmmword ptr [rbx+4H*10H], zmm0, 02H
+ vextracti32x4 xmmword ptr [rbx+5H*10H], zmm1, 02H
+ vextracti32x4 xmmword ptr [rbx+6H*10H], zmm0, 03H
+ vextracti32x4 xmmword ptr [rbx+7H*10H], zmm1, 03H
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+40H]
+ vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+1H*10H]
+ vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+5H*10H]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+40H], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+final3blocks:
+ test esi, 2H
+ je final1block
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+10H]
+ vmovd xmm13, dword ptr [rsp]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+40H], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovd xmm14, dword ptr [rsp+4H]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+44H], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vinserti128 ymm13, ymm13, xmm14, 01H
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+88H], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV]
+ vpbroadcastd ymm8, dword ptr [rsp+88H]
+ vpblendd ymm3, ymm13, ymm8, 88H
+ vmovups ymm8, ymmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 93H
+ vpshufd ymm7, ymm7, 93H
+ mov al, 7
+roundloop2:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 93H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 39H
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 39H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 93H
+ dec al
+ jz endroundloop2
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0FH
+ vpshufd ymm4, ymm8, 39H
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0AAH
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 88H
+ vpshufd ymm8, ymm8, 78H
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 1EH
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp roundloop2
+endroundloop2:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+40H]
+ vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+8H]
+ vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+48H]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+40H], xmm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+final1block:
+ test esi, 1H
+ je unwind
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ vmovd xmm14, dword ptr [rsp]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+40H], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovdqa xmm15, xmmword ptr [BLAKE3_IV]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vpinsrd xmm3, xmm14, eax, 3
+ vmovdqa xmm2, xmm15
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vmovups xmm9, xmmword ptr [r8+rdx-30H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vmovups xmm9, xmmword ptr [r8+rdx-10H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+roundloop1:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+
+_blake3_hash_many_avx512 ENDP
+blake3_hash_many_avx512 ENDP
+
+ALIGN 16
+blake3_compress_in_place_avx512 PROC
+_blake3_compress_in_place_avx512 PROC
+ sub rsp, 72
+ vmovdqa xmmword ptr [rsp], xmm6
+ vmovdqa xmmword ptr [rsp+10H], xmm7
+ vmovdqa xmmword ptr [rsp+20H], xmm8
+ vmovdqa xmmword ptr [rsp+30H], xmm9
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ movzx eax, byte ptr [rsp+70H]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ vmovd xmm3, r9
+ vmovd xmm4, r8
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV]
+ vmovups xmm8, xmmword ptr [rdx]
+ vmovups xmm9, xmmword ptr [rdx+10H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rdx+20H]
+ vmovups xmm9, xmmword ptr [rdx+30H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+@@:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp @B
+@@:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vmovdqu xmmword ptr [rcx], xmm0
+ vmovdqu xmmword ptr [rcx+10H], xmm1
+ vmovdqa xmm6, xmmword ptr [rsp]
+ vmovdqa xmm7, xmmword ptr [rsp+10H]
+ vmovdqa xmm8, xmmword ptr [rsp+20H]
+ vmovdqa xmm9, xmmword ptr [rsp+30H]
+ add rsp, 72
+ ret
+_blake3_compress_in_place_avx512 ENDP
+blake3_compress_in_place_avx512 ENDP
+
+ALIGN 16
+blake3_compress_xof_avx512 PROC
+_blake3_compress_xof_avx512 PROC
+ sub rsp, 72
+ vmovdqa xmmword ptr [rsp], xmm6
+ vmovdqa xmmword ptr [rsp+10H], xmm7
+ vmovdqa xmmword ptr [rsp+20H], xmm8
+ vmovdqa xmmword ptr [rsp+30H], xmm9
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ movzx eax, byte ptr [rsp+70H]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+78H]
+ shl rax, 32
+ add r8, rax
+ vmovd xmm3, r9
+ vmovd xmm4, r8
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV]
+ vmovups xmm8, xmmword ptr [rdx]
+ vmovups xmm9, xmmword ptr [rdx+10H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rdx+20H]
+ vmovups xmm9, xmmword ptr [rdx+30H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+@@:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp @B
+@@:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vpxor xmm2, xmm2, xmmword ptr [rcx]
+ vpxor xmm3, xmm3, xmmword ptr [rcx+10H]
+ vmovdqu xmmword ptr [r10], xmm0
+ vmovdqu xmmword ptr [r10+10H], xmm1
+ vmovdqu xmmword ptr [r10+20H], xmm2
+ vmovdqu xmmword ptr [r10+30H], xmm3
+ vmovdqa xmm6, xmmword ptr [rsp]
+ vmovdqa xmm7, xmmword ptr [rsp+10H]
+ vmovdqa xmm8, xmmword ptr [rsp+20H]
+ vmovdqa xmm9, xmmword ptr [rsp+30H]
+ add rsp, 72
+ ret
+_blake3_compress_xof_avx512 ENDP
+blake3_compress_xof_avx512 ENDP
+
+_TEXT ENDS
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+INDEX0:
+ dd 0, 1, 2, 3, 16, 17, 18, 19
+ dd 8, 9, 10, 11, 24, 25, 26, 27
+INDEX1:
+ dd 4, 5, 6, 7, 20, 21, 22, 23
+ dd 12, 13, 14, 15, 28, 29, 30, 31
+ADD0:
+ dd 0, 1, 2, 3, 4, 5, 6, 7
+ dd 8, 9, 10, 11, 12, 13, 14, 15
+ADD1:
+ dd 1
+ADD16:
+ dd 16
+BLAKE3_BLOCK_LEN:
+ dd 64
+ALIGN 64
+BLAKE3_IV:
+BLAKE3_IV_0:
+ dd 06A09E667H
+BLAKE3_IV_1:
+ dd 0BB67AE85H
+BLAKE3_IV_2:
+ dd 03C6EF372H
+BLAKE3_IV_3:
+ dd 0A54FF53AH
+
+_RDATA ENDS
+END
#endif
#endif
+#define MAYBE_UNUSED(x) (void)((x))
+
#if defined(IS_X86)
static uint64_t xgetbv() {
#if defined(_MSC_VER)
uint8_t flags) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if (features & AVX512VL) {
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
uint8_t out[64]) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if (features & AVX512VL) {
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
size_t blake3_simd_degree(void) {
#if defined(IS_X86)
const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
#if !defined(BLAKE3_NO_AVX512)
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
return 16;
movzx r8d, r8b
shl rax, 32
add r8, rax
- movq xmm3, r9
- movq xmm4, r8
+ movd xmm3, r9
+ movd xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]
mov r10, qword ptr [rsp+0A8H]
shl rax, 32
add r8, rax
- movq xmm3, r9
- movq xmm4, r8
+ movd xmm3, r9
+ movd xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]
--- /dev/null
+public _blake3_hash_many_sse41
+public blake3_hash_many_sse41
+public blake3_compress_in_place_sse41
+public _blake3_compress_in_place_sse41
+public blake3_compress_xof_sse41
+public _blake3_compress_xof_sse41
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_sse41 PROC
+_blake3_hash_many_sse41 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 528
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ movdqa xmmword ptr [rsp+170H], xmm6
+ movdqa xmmword ptr [rsp+180H], xmm7
+ movdqa xmmword ptr [rsp+190H], xmm8
+ movdqa xmmword ptr [rsp+1A0H], xmm9
+ movdqa xmmword ptr [rsp+1B0H], xmm10
+ movdqa xmmword ptr [rsp+1C0H], xmm11
+ movdqa xmmword ptr [rsp+1D0H], xmm12
+ movdqa xmmword ptr [rsp+1E0H], xmm13
+ movdqa xmmword ptr [rsp+1F0H], xmm14
+ movdqa xmmword ptr [rsp+200H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 00H
+ movdqa xmmword ptr [rsp+130H], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0]
+ pand xmm0, xmmword ptr [ADD1]
+ movdqa xmmword ptr [rsp+150H], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 00H
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+110H], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 00H
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+120H], xmm2
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+78H]
+ movzx r12d, byte ptr [rbp+88H]
+ cmp rsi, 4
+ jc final3blocks
+outerloop4:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 00H
+ pshufd xmm1, xmm3, 55H
+ pshufd xmm2, xmm3, 0AAH
+ pshufd xmm3, xmm3, 0FFH
+ movdqu xmm7, xmmword ptr [rcx+10H]
+ pshufd xmm4, xmm7, 00H
+ pshufd xmm5, xmm7, 55H
+ pshufd xmm6, xmm7, 0AAH
+ pshufd xmm7, xmm7, 0FFH
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-40H]
+ movdqu xmm9, xmmword ptr [r9+rdx-40H]
+ movdqu xmm10, xmmword ptr [r10+rdx-40H]
+ movdqu xmm11, xmmword ptr [r11+rdx-40H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+10H], xmm9
+ movdqa xmmword ptr [rsp+20H], xmm12
+ movdqa xmmword ptr [rsp+30H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-30H]
+ movdqu xmm9, xmmword ptr [r9+rdx-30H]
+ movdqu xmm10, xmmword ptr [r10+rdx-30H]
+ movdqu xmm11, xmmword ptr [r11+rdx-30H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+40H], xmm8
+ movdqa xmmword ptr [rsp+50H], xmm9
+ movdqa xmmword ptr [rsp+60H], xmm12
+ movdqa xmmword ptr [rsp+70H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-20H]
+ movdqu xmm9, xmmword ptr [r9+rdx-20H]
+ movdqu xmm10, xmmword ptr [r10+rdx-20H]
+ movdqu xmm11, xmmword ptr [r11+rdx-20H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+80H], xmm8
+ movdqa xmmword ptr [rsp+90H], xmm9
+ movdqa xmmword ptr [rsp+0A0H], xmm12
+ movdqa xmmword ptr [rsp+0B0H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-10H]
+ movdqu xmm9, xmmword ptr [r9+rdx-10H]
+ movdqu xmm10, xmmword ptr [r10+rdx-10H]
+ movdqu xmm11, xmmword ptr [r11+rdx-10H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0C0H], xmm8
+ movdqa xmmword ptr [rsp+0D0H], xmm9
+ movdqa xmmword ptr [rsp+0E0H], xmm12
+ movdqa xmmword ptr [rsp+0F0H], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3]
+ movdqa xmm12, xmmword ptr [rsp+110H]
+ movdqa xmm13, xmmword ptr [rsp+120H]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 00H
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+40H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+10H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+50H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+80H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+0C0H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+90H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+0D0H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+20H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+70H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+60H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+10H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+90H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0B0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+0E0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+30H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+0D0H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+40H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+20H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+60H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+0B0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+50H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0F0H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0A0H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+0E0H]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+70H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+30H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+40H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+50H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+80H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0C0H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+0F0H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0D0H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+0A0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+70H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+20H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+10H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+90H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+80H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0E0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+0C0H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0D0H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+20H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+30H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+60H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0B0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+10H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0F0H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+90H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0E0H]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+30H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0A0H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+40H]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne innerloop4
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+20H], xmm1
+ movdqu xmmword ptr [rbx+40H], xmm9
+ movdqu xmmword ptr [rbx+60H], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+10H], xmm4
+ movdqu xmmword ptr [rbx+30H], xmm5
+ movdqu xmmword ptr [rbx+50H], xmm9
+ movdqu xmmword ptr [rbx+70H], xmm7
+ movdqa xmm1, xmmword ptr [rsp+110H]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+150H]
+ movdqa xmmword ptr [rsp+110H], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+120H]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+120H], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc outerloop4
+ test rsi, rsi
+ jne final3blocks
+unwind:
+ movdqa xmm6, xmmword ptr [rsp+170H]
+ movdqa xmm7, xmmword ptr [rsp+180H]
+ movdqa xmm8, xmmword ptr [rsp+190H]
+ movdqa xmm9, xmmword ptr [rsp+1A0H]
+ movdqa xmm10, xmmword ptr [rsp+1B0H]
+ movdqa xmm11, xmmword ptr [rsp+1C0H]
+ movdqa xmm12, xmmword ptr [rsp+1D0H]
+ movdqa xmm13, xmmword ptr [rsp+1E0H]
+ movdqa xmm14, xmmword ptr [rsp+1F0H]
+ movdqa xmm15, xmmword ptr [rsp+200H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final3blocks:
+ test esi, 2H
+ je final1block
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+110H]
+ pinsrd xmm13, dword ptr [rsp+120H], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+114H]
+ pinsrd xmm14, dword ptr [rsp+124H], 1
+ pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ movaps xmmword ptr [rsp+10H], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-40H]
+ movups xmm5, xmmword ptr [r8+rdx-30H]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-20H]
+ movups xmm7, xmmword ptr [r8+rdx-10H]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 93H
+ movups xmm12, xmmword ptr [r9+rdx-40H]
+ movups xmm13, xmmword ptr [r9+rdx-30H]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-20H]
+ movups xmm15, xmmword ptr [r9+rdx-10H]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 93H
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 93H
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+10H]
+ pinsrd xmm3, eax, 3
+ pinsrd xmm11, eax, 3
+ mov al, 7
+roundloop2:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+20H], xmm4
+ movaps xmmword ptr [rsp+30H], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm12, xmmword ptr [ROT16]
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+40H], xmm5
+ movaps xmmword ptr [rsp+50H], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm13, xmmword ptr [ROT8]
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm8, xmm8, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm11, xmm11, 4EH
+ pshufd xmm2, xmm2, 39H
+ pshufd xmm10, xmm10, 39H
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm8, xmm8, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm11, xmm11, 4EH
+ pshufd xmm2, xmm2, 93H
+ pshufd xmm10, xmm10, 93H
+ dec al
+ je endroundloop2
+ movdqa xmm12, xmmword ptr [rsp+20H]
+ movdqa xmm5, xmmword ptr [rsp+40H]
+ pshufd xmm13, xmm12, 0FH
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 39H
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pblendw xmm13, xmm12, 0CCH
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ pblendw xmm12, xmm6, 0C0H
+ pshufd xmm12, xmm12, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmmword ptr [rsp+20H], xmm13
+ movdqa xmmword ptr [rsp+40H], xmm12
+ movdqa xmm5, xmmword ptr [rsp+30H]
+ movdqa xmm13, xmmword ptr [rsp+50H]
+ pshufd xmm6, xmm5, 0FH
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 39H
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pblendw xmm6, xmm5, 0CCH
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ pblendw xmm5, xmm14, 0C0H
+ pshufd xmm5, xmm5, 78H
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 1EH
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+20H]
+ movdqa xmm6, xmmword ptr [rsp+40H]
+ jmp roundloop2
+endroundloop2:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+10H], xmm1
+ movups xmmword ptr [rbx+20H], xmm8
+ movups xmmword ptr [rbx+30H], xmm9
+ movdqa xmm0, xmmword ptr [rsp+130H]
+ movdqa xmm1, xmmword ptr [rsp+110H]
+ movdqa xmm2, xmmword ptr [rsp+120H]
+ movdqu xmm3, xmmword ptr [rsp+118H]
+ movdqu xmm4, xmmword ptr [rsp+128H]
+ blendvps xmm1, xmm3, xmm0
+ blendvps xmm2, xmm4, xmm0
+ movdqa xmmword ptr [rsp+110H], xmm1
+ movdqa xmmword ptr [rsp+120H], xmm2
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+final1block:
+ test esi, 1H
+ je unwind
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movd xmm13, dword ptr [rsp+110H]
+ pinsrd xmm13, dword ptr [rsp+120H], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ movaps xmm14, xmmword ptr [ROT8]
+ movaps xmm15, xmmword ptr [ROT16]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movaps xmm3, xmm13
+ pinsrd xmm3, eax, 3
+ movups xmm4, xmmword ptr [r8+rdx-40H]
+ movups xmm5, xmmword ptr [r8+rdx-30H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-20H]
+ movups xmm7, xmmword ptr [r8+rdx-10H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ mov al, 7
+roundloop1:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0CCH
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0C0H
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+_blake3_hash_many_sse41 ENDP
+blake3_hash_many_sse41 ENDP
+
+blake3_compress_in_place_sse41 PROC
+_blake3_compress_in_place_sse41 PROC
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+10H], xmm7
+ movdqa xmmword ptr [rsp+20H], xmm8
+ movdqa xmmword ptr [rsp+30H], xmm9
+ movdqa xmmword ptr [rsp+40H], xmm11
+ movdqa xmmword ptr [rsp+50H], xmm14
+ movdqa xmmword ptr [rsp+60H], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movzx eax, byte ptr [rsp+0A0H]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ movd xmm3, r9
+ movd xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+10H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+20H]
+ movups xmm7, xmmword ptr [rdx+30H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ movaps xmm14, xmmword ptr [ROT8]
+ movaps xmm15, xmmword ptr [ROT16]
+ mov al, 7
+@@:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0CCH
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0C0H
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp @B
+@@:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rcx], xmm0
+ movups xmmword ptr [rcx+10H], xmm1
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+10H]
+ movdqa xmm8, xmmword ptr [rsp+20H]
+ movdqa xmm9, xmmword ptr [rsp+30H]
+ movdqa xmm11, xmmword ptr [rsp+40H]
+ movdqa xmm14, xmmword ptr [rsp+50H]
+ movdqa xmm15, xmmword ptr [rsp+60H]
+ add rsp, 120
+ ret
+_blake3_compress_in_place_sse41 ENDP
+blake3_compress_in_place_sse41 ENDP
+
+ALIGN 16
+blake3_compress_xof_sse41 PROC
+_blake3_compress_xof_sse41 PROC
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+10H], xmm7
+ movdqa xmmword ptr [rsp+20H], xmm8
+ movdqa xmmword ptr [rsp+30H], xmm9
+ movdqa xmmword ptr [rsp+40H], xmm11
+ movdqa xmmword ptr [rsp+50H], xmm14
+ movdqa xmmword ptr [rsp+60H], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movzx eax, byte ptr [rsp+0A0H]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+0A8H]
+ shl rax, 32
+ add r8, rax
+ movd xmm3, r9
+ movd xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+10H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+20H]
+ movups xmm7, xmmword ptr [rdx+30H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ movaps xmm14, xmmword ptr [ROT8]
+ movaps xmm15, xmmword ptr [ROT16]
+ mov al, 7
+@@:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0CCH
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0C0H
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp @B
+@@:
+ movdqu xmm4, xmmword ptr [rcx]
+ movdqu xmm5, xmmword ptr [rcx+10H]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r10], xmm0
+ movups xmmword ptr [r10+10H], xmm1
+ movups xmmword ptr [r10+20H], xmm2
+ movups xmmword ptr [r10+30H], xmm3
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+10H]
+ movdqa xmm8, xmmword ptr [rsp+20H]
+ movdqa xmm9, xmmword ptr [rsp+30H]
+ movdqa xmm11, xmmword ptr [rsp+40H]
+ movdqa xmm14, xmmword ptr [rsp+50H]
+ movdqa xmm15, xmmword ptr [rsp+60H]
+ add rsp, 120
+ ret
+_blake3_compress_xof_sse41 ENDP
+blake3_compress_xof_sse41 ENDP
+
+_TEXT ENDS
+
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+BLAKE3_IV:
+ dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH
+
+ADD0:
+ dd 0, 1, 2, 3
+
+ADD1:
+ dd 4 dup (4)
+
+BLAKE3_IV_0:
+ dd 4 dup (6A09E667H)
+
+BLAKE3_IV_1:
+ dd 4 dup (0BB67AE85H)
+
+BLAKE3_IV_2:
+ dd 4 dup (3C6EF372H)
+
+BLAKE3_IV_3:
+ dd 4 dup (0A54FF53AH)
+
+BLAKE3_BLOCK_LEN:
+ dd 4 dup (64)
+
+ROT16:
+ db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+
+ROT8:
+ db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+
+CMP_MSB_MASK:
+ dd 8 dup(80000000H)
+
+_RDATA ENDS
+END
+
#define DOCTEST_VERSION_MAJOR 2
#define DOCTEST_VERSION_MINOR 4
-#define DOCTEST_VERSION_PATCH 1
-#define DOCTEST_VERSION_STR "2.4.1"
+#define DOCTEST_VERSION_PATCH 4
+#define DOCTEST_VERSION_STR "2.4.4"
#define DOCTEST_VERSION \
(DOCTEST_VERSION_MAJOR * 10000 + DOCTEST_VERSION_MINOR * 100 + DOCTEST_VERSION_PATCH)
#define DOCTEST_BREAK_INTO_DEBUGGER() raise(SIGTRAP)
#endif
#elif defined(DOCTEST_PLATFORM_MAC)
-#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64__)
+#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || defined(__i386)
#define DOCTEST_BREAK_INTO_DEBUGGER() __asm__("int $3\n" : :)
#else
#define DOCTEST_BREAK_INTO_DEBUGGER() __asm__("brk #0");
bool gnu_file_line; // if line numbers should be surrounded with :x: and not (x):
bool no_path_in_filenames; // if the path to files should be removed from the output
bool no_line_numbers; // if source code line numbers should be omitted from the output
+ bool no_debug_output; // no output in the debug console when a debugger is attached
bool no_skipped_summary; // don't print "skipped" in the summary !!! UNDOCUMENTED !!!
bool no_time_in_output; // omit any time/timestamps from output !!! UNDOCUMENTED !!!
} // namespace has_insertion_operator_impl
template<class T>
- using has_insertion_operator = has_insertion_operator_impl::check<T>;
+ using has_insertion_operator = has_insertion_operator_impl::check<const T>;
DOCTEST_INTERFACE void my_memcpy(void* dest, const void* src, unsigned num);
template <typename L, typename R>
String stringifyBinaryExpr(const DOCTEST_REF_WRAP(L) lhs, const char* op,
const DOCTEST_REF_WRAP(R) rhs) {
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
return toString(lhs) + op + toString(rhs);
}
#define DOCTEST_COMPARISON_RETURN_TYPE bool
#else // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
#define DOCTEST_COMPARISON_RETURN_TYPE typename enable_if<can_use_op<L>::value || can_use_op<R>::value, bool>::type
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
inline bool eq(const char* lhs, const char* rhs) { return String(lhs) == String(rhs); }
inline bool ne(const char* lhs, const char* rhs) { return String(lhs) != String(rhs); }
inline bool lt(const char* lhs, const char* rhs) { return String(lhs) < String(rhs); }
MessageBuilder() = delete;
~MessageBuilder();
+ // the preferred way of chaining parameters for stringification
template <typename T>
- MessageBuilder& operator<<(const T& in) {
+ MessageBuilder& operator,(const T& in) {
toStream(m_stream, in);
return *this;
}
+ // kept here just for backwards-compatibility - the comma operator should be preferred now
+ template <typename T>
+ MessageBuilder& operator<<(const T& in) { return this->operator,(in); }
+
+ // the `,` operator has the lowest operator precedence - if `<<` is used by the user then
+ // the `,` operator will be called last which is not what we want and thus the `*` operator
+ // is used first (has higher operator precedence compared to `<<`) so that we guarantee that
+ // an operator of the MessageBuilder class is called first before the rest of the parameters
+ template <typename T>
+ MessageBuilder& operator*(const T& in) { return this->operator,(in); }
+
bool log();
void react();
};
DOCTEST_GLOBAL_NO_WARNINGS_END() typedef int DOCTEST_ANONYMOUS(_DOCTEST_ANON_FOR_SEMICOLON_)
// for logging
-#define DOCTEST_INFO(expression) \
+#define DOCTEST_INFO(...) \
DOCTEST_INFO_IMPL(DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), \
- DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), expression)
+ DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_), __VA_ARGS__)
-#define DOCTEST_INFO_IMPL(lambda_name, mb_name, s_name, expression) \
+#define DOCTEST_INFO_IMPL(lambda_name, mb_name, s_name, ...) \
DOCTEST_MSVC_SUPPRESS_WARNING_WITH_PUSH(4626) \
auto lambda_name = [&](std::ostream* s_name) { \
doctest::detail::MessageBuilder mb_name(__FILE__, __LINE__, doctest::assertType::is_warn); \
mb_name.m_stream = s_name; \
- mb_name << expression; \
+ mb_name * __VA_ARGS__; \
}; \
DOCTEST_MSVC_SUPPRESS_WARNING_POP \
auto DOCTEST_ANONYMOUS(_DOCTEST_CAPTURE_) = doctest::detail::MakeContextScope(lambda_name)
-#define DOCTEST_CAPTURE(x) DOCTEST_INFO(#x " := " << x)
+#define DOCTEST_CAPTURE(x) DOCTEST_INFO(#x " := ", x)
-#define DOCTEST_ADD_AT_IMPL(type, file, line, mb, x) \
+#define DOCTEST_ADD_AT_IMPL(type, file, line, mb, ...) \
do { \
doctest::detail::MessageBuilder mb(file, line, doctest::assertType::type); \
- mb << x; \
+ mb * __VA_ARGS__; \
DOCTEST_ASSERT_LOG_AND_REACT(mb); \
} while(false)
// clang-format off
-#define DOCTEST_ADD_MESSAGE_AT(file, line, x) DOCTEST_ADD_AT_IMPL(is_warn, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), x)
-#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, x) DOCTEST_ADD_AT_IMPL(is_check, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), x)
-#define DOCTEST_ADD_FAIL_AT(file, line, x) DOCTEST_ADD_AT_IMPL(is_require, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), x)
+#define DOCTEST_ADD_MESSAGE_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_warn, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), __VA_ARGS__)
+#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_check, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), __VA_ARGS__)
+#define DOCTEST_ADD_FAIL_AT(file, line, ...) DOCTEST_ADD_AT_IMPL(is_require, file, line, DOCTEST_ANONYMOUS(_DOCTEST_MESSAGE_), __VA_ARGS__)
// clang-format on
-#define DOCTEST_MESSAGE(x) DOCTEST_ADD_MESSAGE_AT(__FILE__, __LINE__, x)
-#define DOCTEST_FAIL_CHECK(x) DOCTEST_ADD_FAIL_CHECK_AT(__FILE__, __LINE__, x)
-#define DOCTEST_FAIL(x) DOCTEST_ADD_FAIL_AT(__FILE__, __LINE__, x)
+#define DOCTEST_MESSAGE(...) DOCTEST_ADD_MESSAGE_AT(__FILE__, __LINE__, __VA_ARGS__)
+#define DOCTEST_FAIL_CHECK(...) DOCTEST_ADD_FAIL_CHECK_AT(__FILE__, __LINE__, __VA_ARGS__)
+#define DOCTEST_FAIL(...) DOCTEST_ADD_FAIL_AT(__FILE__, __LINE__, __VA_ARGS__)
#define DOCTEST_TO_LVALUE(...) __VA_ARGS__ // Not removed to keep backwards compatibility.
#define DOCTEST_REQUIRE_FALSE(...) DOCTEST_ASSERT_IMPLEMENT_1(DT_REQUIRE_FALSE, __VA_ARGS__)
// clang-format off
-#define DOCTEST_WARN_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN, cond); } while(false)
-#define DOCTEST_CHECK_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK, cond); } while(false)
-#define DOCTEST_REQUIRE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE, cond); } while(false)
-#define DOCTEST_WARN_FALSE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN_FALSE, cond); } while(false)
-#define DOCTEST_CHECK_FALSE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK_FALSE, cond); } while(false)
-#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, msg) do { DOCTEST_INFO(msg); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE_FALSE, cond); } while(false)
+#define DOCTEST_WARN_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN, cond); } while(false)
+#define DOCTEST_CHECK_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK, cond); } while(false)
+#define DOCTEST_REQUIRE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE, cond); } while(false)
+#define DOCTEST_WARN_FALSE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_WARN_FALSE, cond); } while(false)
+#define DOCTEST_CHECK_FALSE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_CHECK_FALSE, cond); } while(false)
+#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_ASSERT_IMPLEMENT_2(DT_REQUIRE_FALSE, cond); } while(false)
// clang-format on
#define DOCTEST_ASSERT_THROWS_AS(expr, assert_type, message, ...) \
__LINE__, #expr, #__VA_ARGS__, message); \
try { \
DOCTEST_CAST_TO_VOID(expr) \
- } catch(const doctest::detail::remove_const< \
- doctest::detail::remove_reference<__VA_ARGS__>::type>::type&) { \
+ } catch(const typename doctest::detail::remove_const< \
+ typename doctest::detail::remove_reference<__VA_ARGS__>::type>::type&) { \
_DOCTEST_RB.translateException(); \
_DOCTEST_RB.m_threw_as = true; \
} catch(...) { _DOCTEST_RB.translateException(); } \
#define DOCTEST_CHECK_NOTHROW(...) DOCTEST_ASSERT_NOTHROW(DT_CHECK_NOTHROW, __VA_ARGS__)
#define DOCTEST_REQUIRE_NOTHROW(...) DOCTEST_ASSERT_NOTHROW(DT_REQUIRE_NOTHROW, __VA_ARGS__)
-#define DOCTEST_WARN_THROWS_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS(expr); } while(false)
-#define DOCTEST_CHECK_THROWS_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS(expr); } while(false)
-#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS(expr); } while(false)
-#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS_AS(expr, ex); } while(false)
-#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS_AS(expr, ex); } while(false)
-#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS_AS(expr, ex); } while(false)
-#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS_WITH(expr, with); } while(false)
-#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS_WITH(expr, with); } while(false)
-#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS_WITH(expr, with); } while(false)
-#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_THROWS_WITH_AS(expr, with, ex); } while(false)
-#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_THROWS_WITH_AS(expr, with, ex); } while(false)
-#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, ex); } while(false)
-#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_WARN_NOTHROW(expr); } while(false)
-#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_CHECK_NOTHROW(expr); } while(false)
-#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, msg) do { DOCTEST_INFO(msg); DOCTEST_REQUIRE_NOTHROW(expr); } while(false)
+#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS(expr); } while(false)
+#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS(expr); } while(false)
+#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS(expr); } while(false)
+#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_AS(expr, ex); } while(false)
+#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_AS(expr, ex); } while(false)
+#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_AS(expr, ex); } while(false)
+#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_WITH(expr, with); } while(false)
+#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_WITH(expr, with); } while(false)
+#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_WITH(expr, with); } while(false)
+#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_THROWS_WITH_AS(expr, with, ex); } while(false)
+#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_THROWS_WITH_AS(expr, with, ex); } while(false)
+#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_THROWS_WITH_AS(expr, with, ex); } while(false)
+#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_WARN_NOTHROW(expr); } while(false)
+#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_CHECK_NOTHROW(expr); } while(false)
+#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) do { DOCTEST_INFO(__VA_ARGS__); DOCTEST_REQUIRE_NOTHROW(expr); } while(false)
// clang-format on
#ifndef DOCTEST_CONFIG_SUPER_FAST_ASSERTS
#define DOCTEST_CHECK_NOTHROW(...) (static_cast<void>(0))
#define DOCTEST_REQUIRE_NOTHROW(...) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
#else // DOCTEST_CONFIG_NO_EXCEPTIONS_BUT_WITH_ALL_ASSERTS
#define DOCTEST_REGISTER_REPORTER(name, priority, reporter)
#define DOCTEST_REGISTER_LISTENER(name, priority, reporter)
-#define DOCTEST_INFO(x) (static_cast<void>(0))
+#define DOCTEST_INFO(...) (static_cast<void>(0))
#define DOCTEST_CAPTURE(x) (static_cast<void>(0))
-#define DOCTEST_ADD_MESSAGE_AT(file, line, x) (static_cast<void>(0))
-#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, x) (static_cast<void>(0))
-#define DOCTEST_ADD_FAIL_AT(file, line, x) (static_cast<void>(0))
-#define DOCTEST_MESSAGE(x) (static_cast<void>(0))
-#define DOCTEST_FAIL_CHECK(x) (static_cast<void>(0))
-#define DOCTEST_FAIL(x) (static_cast<void>(0))
+#define DOCTEST_ADD_MESSAGE_AT(file, line, ...) (static_cast<void>(0))
+#define DOCTEST_ADD_FAIL_CHECK_AT(file, line, ...) (static_cast<void>(0))
+#define DOCTEST_ADD_FAIL_AT(file, line, ...) (static_cast<void>(0))
+#define DOCTEST_MESSAGE(...) (static_cast<void>(0))
+#define DOCTEST_FAIL_CHECK(...) (static_cast<void>(0))
+#define DOCTEST_FAIL(...) (static_cast<void>(0))
#define DOCTEST_WARN(...) (static_cast<void>(0))
#define DOCTEST_CHECK(...) (static_cast<void>(0))
#define DOCTEST_CHECK_FALSE(...) (static_cast<void>(0))
#define DOCTEST_REQUIRE_FALSE(...) (static_cast<void>(0))
-#define DOCTEST_WARN_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_FALSE_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_FALSE_MESSAGE(cond, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, msg) (static_cast<void>(0))
+#define DOCTEST_WARN_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_FALSE_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_FALSE_MESSAGE(cond, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_FALSE_MESSAGE(cond, ...) (static_cast<void>(0))
#define DOCTEST_WARN_THROWS(...) (static_cast<void>(0))
#define DOCTEST_CHECK_THROWS(...) (static_cast<void>(0))
#define DOCTEST_CHECK_NOTHROW(...) (static_cast<void>(0))
#define DOCTEST_REQUIRE_NOTHROW(...) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, msg) (static_cast<void>(0))
-#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
-#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, msg) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_AS_MESSAGE(expr, ex, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_WITH_MESSAGE(expr, with, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_THROWS_WITH_AS_MESSAGE(expr, with, ex, ...) (static_cast<void>(0))
+#define DOCTEST_WARN_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_CHECK_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
+#define DOCTEST_REQUIRE_NOTHROW_MESSAGE(expr, ...) (static_cast<void>(0))
#define DOCTEST_WARN_EQ(...) (static_cast<void>(0))
#define DOCTEST_CHECK_EQ(...) (static_cast<void>(0))
#include <map>
#include <exception>
#include <stdexcept>
-#ifdef DOCTEST_CONFIG_POSIX_SIGNALS
#include <csignal>
-#endif // DOCTEST_CONFIG_POSIX_SIGNALS
#include <cfloat>
#include <cctype>
#include <cstdint>
String::~String() {
if(!isOnStack())
delete[] data.ptr;
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
}
String::String(const char* in)
if(total_size < len) {
// append to the current stack space
memcpy(buf + my_old_size, other.c_str(), other_size + 1);
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
setLast(last - total_size);
} else {
// alloc new chunk
return *this;
}
+// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
String String::operator+(const String& other) const { return String(*this) += other; }
String::String(String&& other) {
DOCTEST_GCC_SUPPRESS_WARNING_WITH_PUSH("-Wnull-dereference")
// depending on the current options this will remove the path of filenames
const char* skipPathFromFilename(const char* file) {
+#ifndef DOCTEST_CONFIG_DISABLE
if(getContextOptions()->no_path_in_filenames) {
auto back = std::strrchr(file, '\\');
auto forward = std::strrchr(file, '/');
return forward + 1;
}
}
+#endif // DOCTEST_CONFIG_DISABLE
return file;
}
DOCTEST_CLANG_SUPPRESS_WARNING_POP
#ifdef DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
String toString(char* in) { return toString(static_cast<const char*>(in)); }
+// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
String toString(const char* in) { return String("\"") + (in ? in : "{null string}") + "\""; }
#endif // DOCTEST_CONFIG_TREAT_CHAR_STAR_AS_STRING
String toString(bool in) { return in ? "true" : "false"; }
bool operator>(const Approx& lhs, double rhs) { return lhs.m_value > rhs && lhs != rhs; }
String toString(const Approx& in) {
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
return String("Approx( ") + doctest::toString(in.m_value) + " )";
}
const ContextOptions* getContextOptions() { return DOCTEST_BRANCH_ON_DISABLED(nullptr, g_cs); }
}
bool TestCase::operator<(const TestCase& other) const {
+ // this will be used only to differentiate between test cases - not relevant for sorting
if(m_line != other.m_line)
return m_line < other.m_line;
const int file_cmp = m_file.compare(other.m_file);
if(file_cmp != 0)
return file_cmp < 0;
+ const int name_cmp = strcmp(m_name, other.m_name);
+ if(name_cmp != 0)
+ return name_cmp < 0;
return m_template_id < other.m_template_id;
}
} // namespace detail
// Windows can easily distinguish between SO and SigSegV,
// but SigInt, SigTerm, etc are handled differently.
SignalDefs signalDefs[] = {
- {EXCEPTION_ILLEGAL_INSTRUCTION, "SIGILL - Illegal instruction signal"},
- {EXCEPTION_STACK_OVERFLOW, "SIGSEGV - Stack overflow"},
- {EXCEPTION_ACCESS_VIOLATION, "SIGSEGV - Segmentation violation signal"},
- {EXCEPTION_INT_DIVIDE_BY_ZERO, "Divide by zero error"},
+ {static_cast<DWORD>(EXCEPTION_ILLEGAL_INSTRUCTION),
+ "SIGILL - Illegal instruction signal"},
+ {static_cast<DWORD>(EXCEPTION_STACK_OVERFLOW), "SIGSEGV - Stack overflow"},
+ {static_cast<DWORD>(EXCEPTION_ACCESS_VIOLATION),
+ "SIGSEGV - Segmentation violation signal"},
+ {static_cast<DWORD>(EXCEPTION_INT_DIVIDE_BY_ZERO), "Divide by zero error"},
};
struct FatalConditionHandler
{
static LONG CALLBACK handleException(PEXCEPTION_POINTERS ExceptionInfo) {
- for(size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) {
- if(ExceptionInfo->ExceptionRecord->ExceptionCode == signalDefs[i].id) {
- reportFatal(signalDefs[i].name);
- break;
+ // Multiple threads may enter this filter/handler at once. We want the error message to be printed on the
+ // console just once no matter how many threads have crashed.
+ static std::mutex mutex;
+ static bool execute = true;
+ {
+ std::lock_guard<std::mutex> lock(mutex);
+ if(execute) {
+ bool reported = false;
+ for(size_t i = 0; i < DOCTEST_COUNTOF(signalDefs); ++i) {
+ if(ExceptionInfo->ExceptionRecord->ExceptionCode == signalDefs[i].id) {
+ reportFatal(signalDefs[i].name);
+ reported = true;
+ break;
+ }
+ }
+ if(reported == false)
+ reportFatal("Unhandled SEH exception caught");
+ if(isDebuggerActive() && !g_cs->no_breaks)
+ DOCTEST_BREAK_INTO_DEBUGGER();
}
+ execute = false;
}
- // If its not an exception we care about, pass it along.
- // This stops us from eating debugger breaks etc.
- return EXCEPTION_CONTINUE_SEARCH;
+ std::exit(EXIT_FAILURE);
}
FatalConditionHandler() {
previousTop = SetUnhandledExceptionFilter(handleException);
// Pass in guarantee size to be filled
SetThreadStackGuarantee(&guaranteeSize);
+
+ // On Windows uncaught exceptions from another thread, exceptions from
+ // destructors, or calls to std::terminate are not a SEH exception
+
+ // The terminal handler gets called when:
+ // - std::terminate is called FROM THE TEST RUNNER THREAD
+ // - an exception is thrown from a destructor FROM THE TEST RUNNER THREAD
+ original_terminate_handler = std::get_terminate();
+ std::set_terminate([]() noexcept {
+ reportFatal("Terminate handler called");
+ if(isDebuggerActive() && !g_cs->no_breaks)
+ DOCTEST_BREAK_INTO_DEBUGGER();
+ std::exit(EXIT_FAILURE); // explicitly exit - otherwise the SIGABRT handler may be called as well
+ });
+
+ // SIGABRT is raised when:
+ // - std::terminate is called FROM A DIFFERENT THREAD
+ // - an exception is thrown from a destructor FROM A DIFFERENT THREAD
+ // - an uncaught exception is thrown FROM A DIFFERENT THREAD
+ prev_sigabrt_handler = std::signal(SIGABRT, [](int signal) noexcept {
+ if(signal == SIGABRT) {
+ reportFatal("SIGABRT - Abort (abnormal termination) signal");
+ if(isDebuggerActive() && !g_cs->no_breaks)
+ DOCTEST_BREAK_INTO_DEBUGGER();
+ std::exit(EXIT_FAILURE);
+ }
+ });
+
+ // The following settings are taken from google test, and more
+ // specifically from UnitTest::Run() inside of gtest.cc
+
+ // the user does not want to see pop-up dialogs about crashes
+ prev_error_mode_1 = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
+ SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
+ // This forces the abort message to go to stderr in all circumstances.
+ prev_error_mode_2 = _set_error_mode(_OUT_TO_STDERR);
+ // In the debug version, Visual Studio pops up a separate dialog
+ // offering a choice to debug the aborted program - we want to disable that.
+ prev_abort_behavior = _set_abort_behavior(0x0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT);
+ // In debug mode, the Windows CRT can crash with an assertion over invalid
+ // input (e.g. passing an invalid file descriptor). The default handling
+ // for these assertions is to pop up a dialog and wait for user input.
+ // Instead ask the CRT to dump such assertions to stderr non-interactively.
+ prev_report_mode = _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+ prev_report_file = _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
}
static void reset() {
// Unregister handler and restore the old guarantee
SetUnhandledExceptionFilter(previousTop);
SetThreadStackGuarantee(&guaranteeSize);
- previousTop = nullptr;
+ std::set_terminate(original_terminate_handler);
+ std::signal(SIGABRT, prev_sigabrt_handler);
+ SetErrorMode(prev_error_mode_1);
+ _set_error_mode(prev_error_mode_2);
+ _set_abort_behavior(prev_abort_behavior, _WRITE_ABORT_MSG | _CALL_REPORTFAULT);
+ _CrtSetReportMode(_CRT_ASSERT, prev_report_mode);
+ _CrtSetReportFile(_CRT_ASSERT, prev_report_file);
isSet = false;
}
}
~FatalConditionHandler() { reset(); }
private:
+ static UINT prev_error_mode_1;
+ static int prev_error_mode_2;
+ static unsigned int prev_abort_behavior;
+ static int prev_report_mode;
+ static _HFILE prev_report_file;
+ static void (*prev_sigabrt_handler)(int);
+ static std::terminate_handler original_terminate_handler;
static bool isSet;
static ULONG guaranteeSize;
static LPTOP_LEVEL_EXCEPTION_FILTER previousTop;
};
+ UINT FatalConditionHandler::prev_error_mode_1;
+ int FatalConditionHandler::prev_error_mode_2;
+ unsigned int FatalConditionHandler::prev_abort_behavior;
+ int FatalConditionHandler::prev_report_mode;
+ _HFILE FatalConditionHandler::prev_report_file;
+ void (*FatalConditionHandler::prev_sigabrt_handler)(int);
+ std::terminate_handler FatalConditionHandler::original_terminate_handler;
bool FatalConditionHandler::isSet = false;
ULONG FatalConditionHandler::guaranteeSize = 0;
LPTOP_LEVEL_EXCEPTION_FILTER FatalConditionHandler::previousTop = nullptr;
// ###################################################################################
DOCTEST_ASSERT_OUT_OF_TESTS(result.m_decomp);
DOCTEST_ASSERT_IN_TESTS(result.m_decomp);
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
}
MessageBuilder::MessageBuilder(const char* file, int line, assertType::Enum severity) {
}
// TODO:
- // - log_contexts()
// - log_message()
// - respond to queries
// - honor remaining options
struct JUnitTestCaseData
{
-DOCTEST_CLANG_SUPPRESS_WARNING_WITH_PUSH("-Wdeprecated-declarations") // gmtime
static std::string getCurrentTimestamp() {
// Beware, this is not reentrant because of backward compatibility issues
// Also, UTC only, again because of backward compatibility (%z is C++11)
std::time(&rawtime);
auto const timeStampSize = sizeof("2017-01-16T17:06:45Z");
- std::tm* timeInfo;
- timeInfo = std::gmtime(&rawtime);
+ std::tm timeInfo;
+#ifdef DOCTEST_PLATFORM_WINDOWS
+ gmtime_s(&timeInfo, &rawtime);
+#else // DOCTEST_PLATFORM_WINDOWS
+ gmtime_r(&rawtime, &timeInfo);
+#endif // DOCTEST_PLATFORM_WINDOWS
char timeStamp[timeStampSize];
const char* const fmt = "%Y-%m-%dT%H:%M:%SZ";
- std::strftime(timeStamp, timeStampSize, fmt, timeInfo);
+ std::strftime(timeStamp, timeStampSize, fmt, &timeInfo);
return std::string(timeStamp);
}
-DOCTEST_CLANG_SUPPRESS_WARNING_POP
struct JUnitTestMessage
{
<< line(rb.m_line) << (opt.gnu_file_line ? ":" : "):") << std::endl;
fulltext_log_assert_to_stream(os, rb);
+ log_contexts(os);
testCaseData.addFailure(rb.m_decomp.c_str(), assertString(rb.m_at), os.str());
}
void log_message(const MessageData&) override {}
void test_case_skipped(const TestCaseData&) override {}
+
+ void log_contexts(std::ostringstream& s) {
+ int num_contexts = get_num_active_contexts();
+ if(num_contexts) {
+ auto contexts = get_active_contexts();
+
+ s << " logged: ";
+ for(int i = 0; i < num_contexts; ++i) {
+ s << (i == 0 ? "" : " ");
+ contexts[i]->stringify(&s);
+ s << std::endl;
+ }
+ }
+ }
};
DOCTEST_REGISTER_REPORTER("junit", 0, JUnitReporter);
DOCTEST_PARSE_AS_BOOL_OR_FLAG("gnu-file-line", "gfl", gnu_file_line, !bool(DOCTEST_MSVC));
DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-path-filenames", "npf", no_path_in_filenames, false);
DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-line-numbers", "nln", no_line_numbers, false);
+ DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-debug-output", "ndo", no_debug_output, false);
DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-skipped-summary", "nss", no_skipped_summary, false);
DOCTEST_PARSE_AS_BOOL_OR_FLAG("no-time-in-output", "ntio", no_time_in_output, false);
// clang-format on
// allows the user to override procedurally the int/bool options from the command line
void Context::setOption(const char* option, int value) {
setOption(option, toString(value).c_str());
+ // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
}
// allows the user to override procedurally the string options from the command line
p->reporters_currently_used.insert(p->reporters_currently_used.begin(), curr.second(*g_cs));
#ifdef DOCTEST_PLATFORM_WINDOWS
- if(isDebuggerActive())
+ if(isDebuggerActive() && p->no_debug_output == false)
p->reporters_currently_used.push_back(new DebugOutputWindowReporter(*g_cs));
#endif // DOCTEST_PLATFORM_WINDOWS
#include <vector>
// The fmt library version in the form major * 10000 + minor * 100 + patch.
-#define FMT_VERSION 70003
+#define FMT_VERSION 70103
#ifdef __clang__
# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
# define FMT_MSC_VER 0
# define FMT_SUPPRESS_MSC_WARNING(n)
#endif
+
#ifdef __has_feature
# define FMT_HAS_FEATURE(x) __has_feature(x)
#else
#endif
#if defined(__has_include) && !defined(__INTELLISENSE__) && \
- !(FMT_ICC_VERSION && FMT_ICC_VERSION < 1600)
+ (!FMT_ICC_VERSION || FMT_ICC_VERSION >= 1600)
# define FMT_HAS_INCLUDE(x) __has_include(x)
#else
# define FMT_HAS_INCLUDE(x) 0
# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VER >= 1900
# define FMT_DEPRECATED [[deprecated]]
# else
-# if defined(__GNUC__) || defined(__clang__)
+# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__)
# define FMT_DEPRECATED __attribute__((deprecated))
# elif FMT_MSC_VER
# define FMT_DEPRECATED __declspec(deprecated)
# endif
#endif
-#ifndef FMT_BEGIN_NAMESPACE
+#ifndef FMT_USE_INLINE_NAMESPACES
# if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \
- FMT_MSC_VER >= 1900
+ (FMT_MSC_VER >= 1900 && !_MANAGED)
+# define FMT_USE_INLINE_NAMESPACES 1
+# else
+# define FMT_USE_INLINE_NAMESPACES 0
+# endif
+#endif
+
+#ifndef FMT_BEGIN_NAMESPACE
+# if FMT_USE_INLINE_NAMESPACES
# define FMT_INLINE_NAMESPACE inline namespace
# define FMT_END_NAMESPACE \
} \
namespace detail {
-// A helper function to suppress bogus "conditional expression is constant"
-// warnings.
+// A helper function to suppress "conditional expression is constant" warnings.
template <typename T> constexpr T const_check(T value) { return value; }
FMT_NORETURN FMT_API void assert_fail(const char* file, int line,
#ifdef FMT_USE_INT128
// Do nothing.
-#elif defined(__SIZEOF_INT128__) && !FMT_NVCC && !(FMT_CLANG_VERSION && FMT_MSC_VER)
+#elif defined(__SIZEOF_INT128__) && !FMT_NVCC && \
+ !(FMT_CLANG_VERSION && FMT_MSC_VER)
# define FMT_USE_INT128 1
using int128_t = __int128_t;
using uint128_t = __uint128_t;
using type = typename result::value_type;
};
+// Reports a compile-time error if S is not a valid format string.
+template <typename..., typename S, FMT_ENABLE_IF(!is_compile_string<S>::value)>
+FMT_INLINE void check_format_string(const S&) {
+#ifdef FMT_ENFORCE_COMPILE_STRING
+ static_assert(is_compile_string<S>::value,
+ "FMT_ENFORCE_COMPILE_STRING requires all format strings to use "
+ "FMT_STRING.");
+#endif
+}
+template <typename..., typename S, FMT_ENABLE_IF(is_compile_string<S>::value)>
+void check_format_string(S);
+
struct error_handler {
constexpr error_handler() = default;
constexpr error_handler(const error_handler&) = default;
using iterator = typename basic_string_view<Char>::iterator;
explicit constexpr basic_format_parse_context(
- basic_string_view<Char> format_str, ErrorHandler eh = {})
- : ErrorHandler(eh), format_str_(format_str), next_arg_id_(0) {}
+ basic_string_view<Char> format_str, ErrorHandler eh = {},
+ int next_arg_id = 0)
+ : ErrorHandler(eh), format_str_(format_str), next_arg_id_(next_arg_id) {}
/**
Returns an iterator to the beginning of the format string range being
using has_formatter =
std::is_constructible<typename Context::template formatter_type<T>>;
+// Checks whether T is a container with contiguous storage.
+template <typename T> struct is_contiguous : std::false_type {};
+template <typename Char>
+struct is_contiguous<std::basic_string<Char>> : std::true_type {};
+
namespace detail {
+// Extracts a reference to the container from back_insert_iterator.
+template <typename Container>
+inline Container& get_container(std::back_insert_iterator<Container> it) {
+ using bi_iterator = std::back_insert_iterator<Container>;
+ struct accessor : bi_iterator {
+ accessor(bi_iterator iter) : bi_iterator(iter) {}
+ using bi_iterator::container;
+ };
+ return *accessor(it).container;
+}
+
/**
\rst
A contiguous memory buffer with an optional growing ability. It is an internal
size_(sz),
capacity_(cap) {}
+ ~buffer() = default;
+
/** Sets the buffer data and capacity. */
void set(T* buf_data, size_t buf_capacity) FMT_NOEXCEPT {
ptr_ = buf_data;
buffer(const buffer&) = delete;
void operator=(const buffer&) = delete;
- virtual ~buffer() = default;
T* begin() FMT_NOEXCEPT { return ptr_; }
T* end() FMT_NOEXCEPT { return ptr_ + size_; }
/** Returns a pointer to the buffer data. */
const T* data() const FMT_NOEXCEPT { return ptr_; }
- /**
- Resizes the buffer. If T is a POD type new elements may not be initialized.
- */
- void resize(size_t new_size) {
- reserve(new_size);
- size_ = new_size;
- }
-
/** Clears this buffer. */
void clear() { size_ = 0; }
- /** Reserves space to store at least *capacity* elements. */
- void reserve(size_t new_capacity) {
+ // Tries resizing the buffer to contain *count* elements. If T is a POD type
+ // the new elements may not be initialized.
+ void try_resize(size_t count) {
+ try_reserve(count);
+ size_ = count <= capacity_ ? count : capacity_;
+ }
+
+ // Tries increasing the buffer capacity to *new_capacity*. It can increase the
+ // capacity by a smaller amount than requested but guarantees there is space
+ // for at least one additional element either by increasing the capacity or by
+ // flushing the buffer if it is full.
+ void try_reserve(size_t new_capacity) {
if (new_capacity > capacity_) grow(new_capacity);
}
void push_back(const T& value) {
- reserve(size_ + 1);
+ try_reserve(size_ + 1);
ptr_[size_++] = value;
}
}
};
-// A container-backed buffer.
+struct buffer_traits {
+ explicit buffer_traits(size_t) {}
+ size_t count() const { return 0; }
+ size_t limit(size_t size) { return size; }
+};
+
+class fixed_buffer_traits {
+ private:
+ size_t count_ = 0;
+ size_t limit_;
+
+ public:
+ explicit fixed_buffer_traits(size_t limit) : limit_(limit) {}
+ size_t count() const { return count_; }
+ size_t limit(size_t size) {
+ size_t n = limit_ > count_ ? limit_ - count_ : 0;
+ count_ += size;
+ return size < n ? size : n;
+ }
+};
+
+// A buffer that writes to an output iterator when flushed.
+template <typename OutputIt, typename T, typename Traits = buffer_traits>
+class iterator_buffer final : public Traits, public buffer<T> {
+ private:
+ OutputIt out_;
+ enum { buffer_size = 256 };
+ T data_[buffer_size];
+
+ protected:
+ void grow(size_t) final FMT_OVERRIDE {
+ if (this->size() == buffer_size) flush();
+ }
+ void flush();
+
+ public:
+ explicit iterator_buffer(OutputIt out, size_t n = buffer_size)
+ : Traits(n),
+ buffer<T>(data_, 0, buffer_size),
+ out_(out) {}
+ ~iterator_buffer() { flush(); }
+
+ OutputIt out() {
+ flush();
+ return out_;
+ }
+ size_t count() const { return Traits::count() + this->size(); }
+};
+
+template <typename T> class iterator_buffer<T*, T> final : public buffer<T> {
+ protected:
+ void grow(size_t) final FMT_OVERRIDE {}
+
+ public:
+ explicit iterator_buffer(T* out, size_t = 0) : buffer<T>(out, 0, ~size_t()) {}
+
+ T* out() { return &*this->end(); }
+};
+
+// A buffer that writes to a container with the contiguous storage.
template <typename Container>
-class container_buffer : public buffer<typename Container::value_type> {
+class iterator_buffer<std::back_insert_iterator<Container>,
+ enable_if_t<is_contiguous<Container>::value,
+ typename Container::value_type>>
+ final : public buffer<typename Container::value_type> {
private:
Container& container_;
protected:
- void grow(size_t capacity) FMT_OVERRIDE {
+ void grow(size_t capacity) final FMT_OVERRIDE {
container_.resize(capacity);
this->set(&container_[0], capacity);
}
public:
- explicit container_buffer(Container& c)
+ explicit iterator_buffer(Container& c)
: buffer<typename Container::value_type>(c.size()), container_(c) {}
+ explicit iterator_buffer(std::back_insert_iterator<Container> out, size_t = 0)
+ : iterator_buffer(get_container(out)) {}
+ std::back_insert_iterator<Container> out() {
+ return std::back_inserter(container_);
+ }
};
-// Extracts a reference to the container from back_insert_iterator.
-template <typename Container>
-inline Container& get_container(std::back_insert_iterator<Container> it) {
- using bi_iterator = std::back_insert_iterator<Container>;
- struct accessor : bi_iterator {
- accessor(bi_iterator iter) : bi_iterator(iter) {}
- using bi_iterator::container;
- };
- return *accessor(it).container;
+// A buffer that counts the number of code units written discarding the output.
+template <typename T = char> class counting_buffer final : public buffer<T> {
+ private:
+ enum { buffer_size = 256 };
+ T data_[buffer_size];
+ size_t count_ = 0;
+
+ protected:
+ void grow(size_t) final FMT_OVERRIDE {
+ if (this->size() != buffer_size) return;
+ count_ += this->size();
+ this->clear();
+ }
+
+ public:
+ counting_buffer() : buffer<T>(data_, 0, buffer_size) {}
+
+ size_t count() { return count_ + this->size(); }
+};
+
+// An output iterator that appends to the buffer.
+// It is used to reduce symbol sizes for the common case.
+template <typename T>
+class buffer_appender : public std::back_insert_iterator<buffer<T>> {
+ using base = std::back_insert_iterator<buffer<T>>;
+
+ public:
+ explicit buffer_appender(buffer<T>& buf) : base(buf) {}
+ buffer_appender(base it) : base(it) {}
+
+ buffer_appender& operator++() {
+ base::operator++();
+ return *this;
+ }
+
+ buffer_appender operator++(int) {
+ buffer_appender tmp = *this;
+ ++*this;
+ return tmp;
+ }
+};
+
+// Maps an output iterator into a buffer.
+template <typename T, typename OutputIt>
+iterator_buffer<OutputIt, T> get_buffer(OutputIt);
+template <typename T> buffer<T>& get_buffer(buffer_appender<T>);
+
+template <typename OutputIt> OutputIt get_buffer_init(OutputIt out) {
+ return out;
+}
+template <typename T> buffer<T>& get_buffer_init(buffer_appender<T> out) {
+ return get_container(out);
+}
+
+template <typename Buffer>
+auto get_iterator(Buffer& buf) -> decltype(buf.out()) {
+ return buf.out();
+}
+template <typename T> buffer_appender<T> get_iterator(buffer<T>& buf) {
+ return buffer_appender<T>(buf);
}
template <typename T, typename Char = char, typename Enable = void>
template <typename T, typename Char, size_t NUM_ARGS, size_t NUM_NAMED_ARGS>
struct arg_data {
// args_[0].named_args points to named_args_ to avoid bloating format_args.
- T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : 1)];
+ // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning.
+ T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : +1)];
named_arg_info<Char> named_args_[NUM_NAMED_ARGS];
template <typename... U>
template <typename T, typename Char, size_t NUM_ARGS>
struct arg_data<T, Char, NUM_ARGS, 0> {
- T args_[NUM_ARGS != 0 ? NUM_ARGS : 1];
+ // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning.
+ T args_[NUM_ARGS != 0 ? NUM_ARGS : +1];
template <typename... U>
FMT_INLINE arg_data(const U&... init) : args_{init...} {}
using long_type = conditional_t<long_short, int, long long>;
using ulong_type = conditional_t<long_short, unsigned, unsigned long long>;
+struct unformattable {};
+
// Maps formatting arguments to core types.
template <typename Context> struct arg_mapper {
using char_type = typename Context::char_type;
return map(val.value);
}
- int map(...) {
- constexpr bool formattable = sizeof(Context) == 0;
- static_assert(
- formattable,
- "Cannot format argument. To make type T formattable provide a "
- "formatter<T> specialization: "
- "https://fmt.dev/latest/api.html#formatting-user-defined-types");
- return 0;
- }
+ unformattable map(...) { return {}; }
};
// A type constant after applying arg_mapper<Context>.
return vis(monostate());
}
-// Checks whether T is a container with contiguous storage.
-template <typename T> struct is_contiguous : std::false_type {};
-template <typename Char>
-struct is_contiguous<std::basic_string<Char>> : std::true_type {};
-template <typename Char>
-struct is_contiguous<detail::buffer<Char>> : std::true_type {};
+template <typename T> struct formattable : std::false_type {};
namespace detail {
+// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
+template <typename... Ts> struct void_t_impl { using type = void; };
+template <typename... Ts>
+using void_t = typename detail::void_t_impl<Ts...>::type;
+
+template <typename It, typename T, typename Enable = void>
+struct is_output_iterator : std::false_type {};
+
+template <typename It, typename T>
+struct is_output_iterator<
+ It, T,
+ void_t<typename std::iterator_traits<It>::iterator_category,
+ decltype(*std::declval<It>() = std::declval<T>())>>
+ : std::true_type {};
+
template <typename OutputIt>
struct is_back_insert_iterator : std::false_type {};
template <typename Container>
template <typename Container>
struct is_contiguous_back_insert_iterator<std::back_insert_iterator<Container>>
: is_contiguous<Container> {};
+template <typename Char>
+struct is_contiguous_back_insert_iterator<buffer_appender<Char>>
+ : std::true_type {};
// A type-erased reference to an std::locale to avoid heavy <locale> include.
class locale_ref {
return arg;
}
+template <typename T> int check(unformattable) {
+ static_assert(
+ formattable<T>(),
+ "Cannot format an argument. To make type T formattable provide a "
+ "formatter<T> specialization: https://fmt.dev/latest/api.html#udt");
+ return 0;
+}
+template <typename T, typename U> inline const U& check(const U& val) {
+ return val;
+}
+
// The type template parameter is there to avoid an ODR violation when using
// a fallback formatter in one translation unit and an implicit conversion in
// another (not recommended).
template <bool IS_PACKED, typename Context, type, typename T,
FMT_ENABLE_IF(IS_PACKED)>
inline value<Context> make_arg(const T& val) {
- return arg_mapper<Context>().map(val);
+ return check<T>(arg_mapper<Context>().map(val));
}
template <bool IS_PACKED, typename Context, type, typename T,
template <typename Char>
using buffer_context =
- basic_format_context<std::back_insert_iterator<detail::buffer<Char>>, Char>;
+ basic_format_context<detail::buffer_appender<Char>, Char>;
using format_context = buffer_context<char>;
using wformat_context = buffer_context<wchar_t>;
-// Workaround a bug in gcc: https://stackoverflow.com/q/62767544/471164.
+// Workaround an alias issue: https://stackoverflow.com/q/62767544/471164.
#define FMT_BUFFER_CONTEXT(Char) \
- basic_format_context<std::back_insert_iterator<detail::buffer<Char>>, Char>
+ basic_format_context<detail::buffer_appender<Char>, Char>
/**
\rst
/**
\rst
- Constructs an `~fmt::format_arg_store` object that contains references to
+ Constructs a `~fmt::format_arg_store` object that contains references to
arguments and can be implicitly converted to `~fmt::format_args`. `Context`
can be omitted in which case it defaults to `~fmt::context`.
See `~fmt::arg` for lifetime considerations.
return {args...};
}
+/**
+ \rst
+ Constructs a `~fmt::format_arg_store` object that contains references
+ to arguments and can be implicitly converted to `~fmt::format_args`.
+ If ``format_str`` is a compile-time string then `make_args_checked` checks
+ its validity at compile time.
+ \endrst
+ */
+template <typename... Args, typename S, typename Char = char_t<S>>
+inline auto make_args_checked(const S& format_str,
+ const remove_reference_t<Args>&... args)
+ -> format_arg_store<buffer_context<Char>, remove_reference_t<Args>...> {
+ static_assert(
+ detail::count<(
+ std::is_base_of<detail::view, remove_reference_t<Args>>::value &&
+ std::is_reference<Args>::value)...>() == 0,
+ "passing views as lvalues is disallowed");
+ detail::check_format_string<Args...>(format_str);
+ return {args...};
+}
+
/**
\rst
Returns a named argument to be used in a formatting function. It should only
}
};
-/** An alias to ``basic_format_args<context>``. */
+#ifdef FMT_ARM_ABI_COMPATIBILITY
+/** An alias to ``basic_format_args<format_context>``. */
+// Separate types would result in shorter symbols but break ABI compatibility
+// between clang and gcc on ARM (#1919).
+using format_args = basic_format_args<format_context>;
+using wformat_args = basic_format_args<wformat_context>;
+#else
+// DEPRECATED! These are kept for ABI compatibility.
// It is a separate type rather than an alias to make symbols readable.
struct format_args : basic_format_args<format_context> {
template <typename... Args>
struct wformat_args : basic_format_args<wformat_context> {
using basic_format_args::basic_format_args;
};
-
-namespace detail {
-
-// Reports a compile-time error if S is not a valid format string.
-template <typename..., typename S, FMT_ENABLE_IF(!is_compile_string<S>::value)>
-FMT_INLINE void check_format_string(const S&) {
-#ifdef FMT_ENFORCE_COMPILE_STRING
- static_assert(is_compile_string<S>::value,
- "FMT_ENFORCE_COMPILE_STRING requires all format strings to use "
- "FMT_STRING.");
#endif
-}
-template <typename..., typename S, FMT_ENABLE_IF(is_compile_string<S>::value)>
-void check_format_string(S);
-template <typename... Args, typename S, typename Char = char_t<S>>
-inline format_arg_store<buffer_context<Char>, remove_reference_t<Args>...>
-make_args_checked(const S& format_str,
- const remove_reference_t<Args>&... args) {
- static_assert(count<(std::is_base_of<view, remove_reference_t<Args>>::value &&
- std::is_reference<Args>::value)...>() == 0,
- "passing views as lvalues is disallowed");
- check_format_string<Args...>(format_str);
- return {args...};
-}
+namespace detail {
template <typename Char, FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
std::basic_string<Char> vformat(
FMT_API std::string vformat(string_view format_str, format_args args);
template <typename Char>
-typename FMT_BUFFER_CONTEXT(Char)::iterator vformat_to(
+void vformat_to(
buffer<Char>& buf, basic_string_view<Char> format_str,
- basic_format_args<FMT_BUFFER_CONTEXT(type_identity_t<Char>)> args);
+ basic_format_args<FMT_BUFFER_CONTEXT(type_identity_t<Char>)> args,
+ detail::locale_ref loc = {});
template <typename Char, typename Args,
FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
/** Formats a string and writes the output to ``out``. */
// GCC 8 and earlier cannot handle std::back_insert_iterator<Container> with
// vformat_to<ArgFormatter>(...) overload, so SFINAE on iterator type instead.
-template <
- typename OutputIt, typename S, typename Char = char_t<S>,
- FMT_ENABLE_IF(detail::is_contiguous_back_insert_iterator<OutputIt>::value)>
-OutputIt vformat_to(
- OutputIt out, const S& format_str,
- basic_format_args<buffer_context<type_identity_t<Char>>> args) {
- auto& c = detail::get_container(out);
- detail::container_buffer<remove_reference_t<decltype(c)>> buf(c);
+template <typename OutputIt, typename S, typename Char = char_t<S>,
+ bool enable = detail::is_output_iterator<OutputIt, Char>::value>
+auto vformat_to(OutputIt out, const S& format_str,
+ basic_format_args<buffer_context<type_identity_t<Char>>> args)
+ -> typename std::enable_if<enable, OutputIt>::type {
+ decltype(detail::get_buffer<Char>(out)) buf(detail::get_buffer_init(out));
detail::vformat_to(buf, to_string_view(format_str), args);
- return out;
+ return detail::get_iterator(buf);
+}
+
+/**
+ \rst
+ Formats arguments, writes the result to the output iterator ``out`` and returns
+ the iterator past the end of the output range.
+
+ **Example**::
+
+ std::vector<char> out;
+ fmt::format_to(std::back_inserter(out), "{}", 42);
+ \endrst
+ */
+// We cannot use FMT_ENABLE_IF because of a bug in gcc 8.3.
+template <typename OutputIt, typename S, typename... Args,
+ bool enable = detail::is_output_iterator<OutputIt, char_t<S>>::value>
+inline auto format_to(OutputIt out, const S& format_str, Args&&... args) ->
+ typename std::enable_if<enable, OutputIt>::type {
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
+ return vformat_to(out, to_string_view(format_str), vargs);
+}
+
+template <typename OutputIt> struct format_to_n_result {
+ /** Iterator past the end of the output range. */
+ OutputIt out;
+ /** Total (not truncated) output size. */
+ size_t size;
+};
+
+template <typename OutputIt, typename Char, typename... Args,
+ FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value)>
+inline format_to_n_result<OutputIt> vformat_to_n(
+ OutputIt out, size_t n, basic_string_view<Char> format_str,
+ basic_format_args<buffer_context<type_identity_t<Char>>> args) {
+ detail::iterator_buffer<OutputIt, Char, detail::fixed_buffer_traits> buf(out,
+ n);
+ detail::vformat_to(buf, format_str, args);
+ return {buf.out(), buf.count()};
+}
+
+/**
+ \rst
+ Formats arguments, writes up to ``n`` characters of the result to the output
+ iterator ``out`` and returns the total output size and the iterator past the
+ end of the output range.
+ \endrst
+ */
+template <typename OutputIt, typename S, typename... Args,
+ bool enable = detail::is_output_iterator<OutputIt, char_t<S>>::value>
+inline auto format_to_n(OutputIt out, size_t n, const S& format_str,
+ const Args&... args) ->
+ typename std::enable_if<enable, format_to_n_result<OutputIt>>::type {
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
+ return vformat_to_n(out, n, to_string_view(format_str), vargs);
}
-template <typename Container, typename S, typename... Args,
- FMT_ENABLE_IF(
- is_contiguous<Container>::value&& detail::is_string<S>::value)>
-inline std::back_insert_iterator<Container> format_to(
- std::back_insert_iterator<Container> out, const S& format_str,
- Args&&... args) {
- return vformat_to(out, to_string_view(format_str),
- detail::make_args_checked<Args...>(format_str, args...));
+/**
+ Returns the number of characters in the output of
+ ``format(format_str, args...)``.
+ */
+template <typename... Args>
+inline size_t formatted_size(string_view format_str, Args&&... args) {
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
+ detail::counting_buffer<> buf;
+ detail::vformat_to(buf, format_str, vargs);
+ return buf.count();
}
template <typename S, typename Char = char_t<S>>
// std::basic_string<char_t<S>> to reduce the symbol size.
template <typename S, typename... Args, typename Char = char_t<S>>
FMT_INLINE std::basic_string<Char> format(const S& format_str, Args&&... args) {
- const auto& vargs = detail::make_args_checked<Args...>(format_str, args...);
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
return detail::vformat(to_string_view(format_str), vargs);
}
*/
template <typename S, typename... Args, typename Char = char_t<S>>
inline void print(std::FILE* f, const S& format_str, Args&&... args) {
- const auto& vargs = detail::make_args_checked<Args...>(format_str, args...);
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
return detail::is_unicode<Char>()
? vprint(f, to_string_view(format_str), vargs)
: detail::vprint_mojibake(f, to_string_view(format_str), vargs);
*/
template <typename S, typename... Args, typename Char = char_t<S>>
inline void print(const S& format_str, Args&&... args) {
- const auto& vargs = detail::make_args_checked<Args...>(format_str, args...);
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
return detail::is_unicode<Char>()
? vprint(to_string_view(format_str), vargs)
: detail::vprint_mojibake(stdout, to_string_view(format_str),
#include <climits>
#include <cmath>
#include <cstdarg>
-#include <cstring> // for std::memmove
+#include <cstring> // std::memmove
#include <cwchar>
#include <exception>
-#include "format.h"
-#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR)
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
# include <locale>
#endif
#ifdef _WIN32
-# if !defined(NOMINMAX) && !defined(WIN32_LEAN_AND_MEAN)
-# define NOMINMAX
-# define WIN32_LEAN_AND_MEAN
-# include <windows.h>
-# undef WIN32_LEAN_AND_MEAN
-# undef NOMINMAX
-# else
-# include <windows.h>
-# endif
-# include <io.h>
+# include <io.h> // _isatty
#endif
-#ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable : 4702) // unreachable code
-#endif
+#include "format.h"
// Dummy implementations of strerror_r and strerror_s called if corresponding
// system functions are not available.
// ERANGE - buffer is not large enough to store the error message
// other - failure
// Buffer should be at least of size 1.
-FMT_FUNC int safe_strerror(int error_code, char*& buffer,
- size_t buffer_size) FMT_NOEXCEPT {
+inline int safe_strerror(int error_code, char*& buffer,
+ size_t buffer_size) FMT_NOEXCEPT {
FMT_ASSERT(buffer != nullptr && buffer_size != 0, "invalid buffer");
class dispatcher {
// Report error code making sure that the output fits into
// inline_buffer_size to avoid dynamic memory allocation and potential
// bad_alloc.
- out.resize(0);
+ out.try_resize(0);
static const char SEP[] = ": ";
static const char ERROR_STR[] = "error ";
// Subtract 2 to account for terminating null characters in SEP and ERROR_STR.
++error_code_size;
}
error_code_size += detail::to_unsigned(detail::count_digits(abs_value));
- auto it = std::back_inserter(out);
+ auto it = buffer_appender<char>(out);
if (message.size() <= inline_buffer_size - error_code_size)
format_to(it, "{}{}", message, SEP);
format_to(it, "{}{}", ERROR_STR, error_code);
}
// A wrapper around fwrite that throws on error.
-FMT_FUNC void fwrite_fully(const void* ptr, size_t size, size_t count,
- FILE* stream) {
+inline void fwrite_fully(const void* ptr, size_t size, size_t count,
+ FILE* stream) {
size_t written = std::fwrite(ptr, size, count, stream);
if (written < count) FMT_THROW(system_error(errno, "cannot write to file"));
}
template <typename T>
const typename basic_data<T>::digit_pair basic_data<T>::digits[] = {
- {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'},
- {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
- {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'},
- {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
- {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'},
- {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
- {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
- {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
- {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'},
- {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
- {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'},
- {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
- {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'},
- {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
- {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'},
- {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
- {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'},
- {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
- {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'},
- {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
+ {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'},
+ {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'},
+ {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'},
+ {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'},
+ {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
+ {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'},
+ {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
+ {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'},
+ {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'},
+ {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
+ {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'},
+ {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'},
+ {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'},
+ {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
+ {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
+ {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'},
+ {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
template <typename T>
const char basic_data<T>::hex_digits[] = "0123456789abcdef";
template <typename T>
const uint32_t basic_data<T>::zero_or_powers_of_10_32[] = {0,
FMT_POWERS_OF_10(1)};
-
template <typename T>
const uint64_t basic_data<T>::zero_or_powers_of_10_64[] = {
0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL),
10000000000000000000ULL};
+template <typename T>
+const uint32_t basic_data<T>::zero_or_powers_of_10_32_new[] = {
+ 0, 0, FMT_POWERS_OF_10(1)};
+
+template <typename T>
+const uint64_t basic_data<T>::zero_or_powers_of_10_64_new[] = {
+ 0, 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ULL),
+ 10000000000000000000ULL};
+
// Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340.
// These are generated by support/compute-powers.py.
template <typename T>
-const uint64_t basic_data<T>::pow10_significands[] = {
+const uint64_t basic_data<T>::grisu_pow10_significands[] = {
0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76,
0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df,
0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c,
// Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding
// to significands above.
template <typename T>
-const int16_t basic_data<T>::pow10_exponents[] = {
+const int16_t basic_data<T>::grisu_pow10_exponents[] = {
-1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954,
-927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661,
-635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369,
534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800,
827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066};
+template <typename T>
+const divtest_table_entry<uint32_t> basic_data<T>::divtest_table_for_pow5_32[] =
+ {{0x00000001, 0xffffffff}, {0xcccccccd, 0x33333333},
+ {0xc28f5c29, 0x0a3d70a3}, {0x26e978d5, 0x020c49ba},
+ {0x3afb7e91, 0x0068db8b}, {0x0bcbe61d, 0x0014f8b5},
+ {0x68c26139, 0x000431bd}, {0xae8d46a5, 0x0000d6bf},
+ {0x22e90e21, 0x00002af3}, {0x3a2e9c6d, 0x00000897},
+ {0x3ed61f49, 0x000001b7}};
+
+template <typename T>
+const divtest_table_entry<uint64_t> basic_data<T>::divtest_table_for_pow5_64[] =
+ {{0x0000000000000001, 0xffffffffffffffff},
+ {0xcccccccccccccccd, 0x3333333333333333},
+ {0x8f5c28f5c28f5c29, 0x0a3d70a3d70a3d70},
+ {0x1cac083126e978d5, 0x020c49ba5e353f7c},
+ {0xd288ce703afb7e91, 0x0068db8bac710cb2},
+ {0x5d4e8fb00bcbe61d, 0x0014f8b588e368f0},
+ {0x790fb65668c26139, 0x000431bde82d7b63},
+ {0xe5032477ae8d46a5, 0x0000d6bf94d5e57a},
+ {0xc767074b22e90e21, 0x00002af31dc46118},
+ {0x8e47ce423a2e9c6d, 0x0000089705f4136b},
+ {0x4fa7f60d3ed61f49, 0x000001b7cdfd9d7b},
+ {0x0fee64690c913975, 0x00000057f5ff85e5},
+ {0x3662e0e1cf503eb1, 0x000000119799812d},
+ {0xa47a2cf9f6433fbd, 0x0000000384b84d09},
+ {0x54186f653140a659, 0x00000000b424dc35},
+ {0x7738164770402145, 0x0000000024075f3d},
+ {0xe4a4d1417cd9a041, 0x000000000734aca5},
+ {0xc75429d9e5c5200d, 0x000000000170ef54},
+ {0xc1773b91fac10669, 0x000000000049c977},
+ {0x26b172506559ce15, 0x00000000000ec1e4},
+ {0xd489e3a9addec2d1, 0x000000000002f394},
+ {0x90e860bb892c8d5d, 0x000000000000971d},
+ {0x502e79bf1b6f4f79, 0x0000000000001e39},
+ {0xdcd618596be30fe5, 0x000000000000060b}};
+
+template <typename T>
+const uint64_t basic_data<T>::dragonbox_pow10_significands_64[] = {
+ 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f,
+ 0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb,
+ 0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28,
+ 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb,
+ 0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a,
+ 0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810,
+ 0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff,
+ 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd,
+ 0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424,
+ 0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b,
+ 0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000,
+ 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000,
+ 0xc350000000000000, 0xf424000000000000, 0x9896800000000000,
+ 0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000,
+ 0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000,
+ 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000,
+ 0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000,
+ 0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000,
+ 0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0,
+ 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940984,
+ 0xa18f07d736b90be5, 0xc9f2c9cd04674ede, 0xfc6f7c4045812296,
+ 0x9dc5ada82b70b59d, 0xc5371912364ce305, 0xf684df56c3e01bc6,
+ 0x9a130b963a6c115c, 0xc097ce7bc90715b3, 0xf0bdc21abb48db20,
+ 0x96769950b50d88f4, 0xbc143fa4e250eb31, 0xeb194f8e1ae525fd,
+ 0x92efd1b8d0cf37be, 0xb7abc627050305ad, 0xe596b7b0c643c719,
+ 0x8f7e32ce7bea5c6f, 0xb35dbf821ae4f38b, 0xe0352f62a19e306e};
+
+template <typename T>
+const uint128_wrapper basic_data<T>::dragonbox_pow10_significands_128[] = {
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+ {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+ {0x9faacf3df73609b1, 0x77b191618c54e9ad},
+ {0xc795830d75038c1d, 0xd59df5b9ef6a2418},
+ {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e},
+ {0x9becce62836ac577, 0x4ee367f9430aec33},
+ {0xc2e801fb244576d5, 0x229c41f793cda740},
+ {0xf3a20279ed56d48a, 0x6b43527578c11110},
+ {0x9845418c345644d6, 0x830a13896b78aaaa},
+ {0xbe5691ef416bd60c, 0x23cc986bc656d554},
+ {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9},
+ {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa},
+ {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54},
+ {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69},
+ {0x91376c36d99995be, 0x23100809b9c21fa2},
+ {0xb58547448ffffb2d, 0xabd40a0c2832a78b},
+ {0xe2e69915b3fff9f9, 0x16c90c8f323f516d},
+ {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4},
+ {0xb1442798f49ffb4a, 0x99cd11cfdf41779d},
+ {0xdd95317f31c7fa1d, 0x40405643d711d584},
+ {0x8a7d3eef7f1cfc52, 0x482835ea666b2573},
+ {0xad1c8eab5ee43b66, 0xda3243650005eed0},
+ {0xd863b256369d4a40, 0x90bed43e40076a83},
+ {0x873e4f75e2224e68, 0x5a7744a6e804a292},
+ {0xa90de3535aaae202, 0x711515d0a205cb37},
+ {0xd3515c2831559a83, 0x0d5a5b44ca873e04},
+ {0x8412d9991ed58091, 0xe858790afe9486c3},
+ {0xa5178fff668ae0b6, 0x626e974dbe39a873},
+ {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+ {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a},
+ {0xa139029f6a239f72, 0x1c1fffc1ebc44e81},
+ {0xc987434744ac874e, 0xa327ffb266b56221},
+ {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9},
+ {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa},
+ {0xc4ce17b399107c22, 0xcb550fb4384d21d4},
+ {0xf6019da07f549b2b, 0x7e2a53a146606a49},
+ {0x99c102844f94e0fb, 0x2eda7444cbfc426e},
+ {0xc0314325637a1939, 0xfa911155fefb5309},
+ {0xf03d93eebc589f88, 0x793555ab7eba27cb},
+ {0x96267c7535b763b5, 0x4bc1558b2f3458df},
+ {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17},
+ {0xea9c227723ee8bcb, 0x465e15a979c1cadd},
+ {0x92a1958a7675175f, 0x0bfacd89ec191eca},
+ {0xb749faed14125d36, 0xcef980ec671f667c},
+ {0xe51c79a85916f484, 0x82b7e12780e7401b},
+ {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811},
+ {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16},
+ {0xdfbdcece67006ac9, 0x67a791e093e1d49b},
+ {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1},
+ {0xaecc49914078536d, 0x58fae9f773886e19},
+ {0xda7f5bf590966848, 0xaf39a475506a899f},
+ {0x888f99797a5e012d, 0x6d8406c952429604},
+ {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84},
+ {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65},
+ {0x855c3be0a17fcd26, 0x5cf2eea09a550680},
+ {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+ {0xd0601d8efc57b08b, 0xf13b94daf124da27},
+ {0x823c12795db6ce57, 0x76c53d08d6b70859},
+ {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f},
+ {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a},
+ {0xfe5d54150b090b02, 0xd3f93b35435d7c4d},
+ {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0},
+ {0xc6b8e9b0709f109a, 0x359ab6419ca1091c},
+ {0xf867241c8cc6d4c0, 0xc30163d203c94b63},
+ {0x9b407691d7fc44f8, 0x79e0de63425dcf1e},
+ {0xc21094364dfb5636, 0x985915fc12f542e5},
+ {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e},
+ {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43},
+ {0xbd8430bd08277231, 0x50c6ff782a838354},
+ {0xece53cec4a314ebd, 0xa4f8bf5635246429},
+ {0x940f4613ae5ed136, 0x871b7795e136be9a},
+ {0xb913179899f68584, 0x28e2557b59846e40},
+ {0xe757dd7ec07426e5, 0x331aeada2fe589d0},
+ {0x9096ea6f3848984f, 0x3ff0d2c85def7622},
+ {0xb4bca50b065abe63, 0x0fed077a756b53aa},
+ {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895},
+ {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d},
+ {0xb080392cc4349dec, 0xbd8d794d96aacfb4},
+ {0xdca04777f541c567, 0xecf0d7a0fc5583a1},
+ {0x89e42caaf9491b60, 0xf41686c49db57245},
+ {0xac5d37d5b79b6239, 0x311c2875c522ced6},
+ {0xd77485cb25823ac7, 0x7d633293366b828c},
+ {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+ {0xa8530886b54dbdeb, 0xd9f57f830283fdfd},
+ {0xd267caa862a12d66, 0xd072df63c324fd7c},
+ {0x8380dea93da4bc60, 0x4247cb9e59f71e6e},
+ {0xa46116538d0deb78, 0x52d9be85f074e609},
+ {0xcd795be870516656, 0x67902e276c921f8c},
+ {0x806bd9714632dff6, 0x00ba1cd8a3db53b7},
+ {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5},
+ {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce},
+ {0xfad2a4b13d1b5d6c, 0x796b805720085f82},
+ {0x9cc3a6eec6311a63, 0xcbe3303674053bb1},
+ {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d},
+ {0xf4f1b4d515acb93b, 0xee92fb5515482d45},
+ {0x991711052d8bf3c5, 0x751bdd152d4d1c4b},
+ {0xbf5cd54678eef0b6, 0xd262d45a78a0635e},
+ {0xef340a98172aace4, 0x86fb897116c87c35},
+ {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1},
+ {0xbae0a846d2195712, 0x8974836059cca10a},
+ {0xe998d258869facd7, 0x2bd1a438703fc94c},
+ {0x91ff83775423cc06, 0x7b6306a34627ddd0},
+ {0xb67f6455292cbf08, 0x1a3bc84c17b1d543},
+ {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94},
+ {0x8e938662882af53e, 0x547eb47b7282ee9d},
+ {0xb23867fb2a35b28d, 0xe99e619a4f23aa44},
+ {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5},
+ {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05},
+ {0xae0b158b4738705e, 0x9624ab50b148d446},
+ {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+ {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7},
+ {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d},
+ {0xd47487cc8470652b, 0x7647c32000696720},
+ {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074},
+ {0xa5fb0a17c777cf09, 0xf468107100525891},
+ {0xcf79cc9db955c2cc, 0x7182148d4066eeb5},
+ {0x81ac1fe293d599bf, 0xc6f14cd848405531},
+ {0xa21727db38cb002f, 0xb8ada00e5a506a7d},
+ {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d},
+ {0xfd442e4688bd304a, 0x908f4a166d1da664},
+ {0x9e4a9cec15763e2e, 0x9a598e4e043287ff},
+ {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe},
+ {0xf7549530e188c128, 0xd12bee59e68ef47d},
+ {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf},
+ {0xc13a148e3032d6e7, 0xe36a52363c1faf02},
+ {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2},
+ {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba},
+ {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8},
+ {0xebdf661791d60f56, 0x111b495b3464ad22},
+ {0x936b9fcebb25c995, 0xcab10dd900beec35},
+ {0xb84687c269ef3bfb, 0x3d5d514f40eea743},
+ {0xe65829b3046b0afa, 0x0cb4a5a3112a5113},
+ {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac},
+ {0xb3f4e093db73a093, 0x59ed216765690f57},
+ {0xe0f218b8d25088b8, 0x306869c13ec3532d},
+ {0x8c974f7383725573, 0x1e414218c73a13fc},
+ {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+ {0xdbac6c247d62a583, 0xdf45f746b74abf3a},
+ {0x894bc396ce5da772, 0x6b8bba8c328eb784},
+ {0xab9eb47c81f5114f, 0x066ea92f3f326565},
+ {0xd686619ba27255a2, 0xc80a537b0efefebe},
+ {0x8613fd0145877585, 0xbd06742ce95f5f37},
+ {0xa798fc4196e952e7, 0x2c48113823b73705},
+ {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6},
+ {0x82ef85133de648c4, 0x9a984d73dbe722fc},
+ {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb},
+ {0xcc963fee10b7d1b3, 0x318df905079926a9},
+ {0xffbbcfe994e5c61f, 0xfdf17746497f7053},
+ {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634},
+ {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1},
+ {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1},
+ {0x9c1661a651213e2d, 0x06bea10ca65c084f},
+ {0xc31bfa0fe5698db8, 0x486e494fcff30a63},
+ {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb},
+ {0x986ddb5c6b3a76b7, 0xf89629465a75e01d},
+ {0xbe89523386091465, 0xf6bbb397f1135824},
+ {0xee2ba6c0678b597f, 0x746aa07ded582e2d},
+ {0x94db483840b717ef, 0xa8c2a44eb4571cdd},
+ {0xba121a4650e4ddeb, 0x92f34d62616ce414},
+ {0xe896a0d7e51e1566, 0x77b020baf9c81d18},
+ {0x915e2486ef32cd60, 0x0ace1474dc1d122f},
+ {0xb5b5ada8aaff80b8, 0x0d819992132456bb},
+ {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a},
+ {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+ {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3},
+ {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf},
+ {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c},
+ {0xad4ab7112eb3929d, 0x86c16c98d2c953c7},
+ {0xd89d64d57a607744, 0xe871c7bf077ba8b8},
+ {0x87625f056c7c4a8b, 0x11471cd764ad4973},
+ {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0},
+ {0xd389b47879823479, 0x4aff1d108d4ec2c4},
+ {0x843610cb4bf160cb, 0xcedf722a585139bb},
+ {0xa54394fe1eedb8fe, 0xc2974eb4ee658829},
+ {0xce947a3da6a9273e, 0x733d226229feea33},
+ {0x811ccc668829b887, 0x0806357d5a3f5260},
+ {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8},
+ {0xc9bcff6034c13052, 0xfc89b393dd02f0b6},
+ {0xfc2c3f3841f17c67, 0xbbac2078d443ace3},
+ {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e},
+ {0xc5029163f384a931, 0x0a9e795e65d4df12},
+ {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6},
+ {0x99ea0196163fa42e, 0x504bced1bf8e4e46},
+ {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7},
+ {0xf07da27a82c37088, 0x5d767327bb4e5a4d},
+ {0x964e858c91ba2655, 0x3a6a07f8d510f870},
+ {0xbbe226efb628afea, 0x890489f70a55368c},
+ {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f},
+ {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e},
+ {0xb77ada0617e3bbcb, 0x09ce6ebb40173745},
+ {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+ {0x8f57fa54c2a9eab6, 0x9fa946824a12232e},
+ {0xb32df8e9f3546564, 0x47939822dc96abfa},
+ {0xdff9772470297ebd, 0x59787e2b93bc56f8},
+ {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b},
+ {0xaefae51477a06b03, 0xede622920b6b23f2},
+ {0xdab99e59958885c4, 0xe95fab368e45ecee},
+ {0x88b402f7fd75539b, 0x11dbcb0218ebb415},
+ {0xaae103b5fcd2a881, 0xd652bdc29f26a11a},
+ {0xd59944a37c0752a2, 0x4be76d3346f04960},
+ {0x857fcae62d8493a5, 0x6f70a4400c562ddc},
+ {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953},
+ {0xd097ad07a71f26b2, 0x7e2000a41346a7a8},
+ {0x825ecc24c873782f, 0x8ed400668c0c28c9},
+ {0xa2f67f2dfa90563b, 0x728900802f0f32fb},
+ {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba},
+ {0xfea126b7d78186bc, 0xe2f610c84987bfa9},
+ {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca},
+ {0xc6ede63fa05d3143, 0x91503d1c79720dbc},
+ {0xf8a95fcf88747d94, 0x75a44c6397ce912b},
+ {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb},
+ {0xc24452da229b021b, 0xfbe85badce996169},
+ {0xf2d56790ab41c2a2, 0xfae27299423fb9c4},
+ {0x97c560ba6b0919a5, 0xdccd879fc967d41b},
+ {0xbdb6b8e905cb600f, 0x5400e987bbc1c921},
+ {0xed246723473e3813, 0x290123e9aab23b69},
+ {0x9436c0760c86e30b, 0xf9a0b6720aaf6522},
+ {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+ {0xe7958cb87392c2c2, 0xb60b1d1230b20e05},
+ {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3},
+ {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4},
+ {0xe2280b6c20dd5232, 0x25c6da63c38de1b1},
+ {0x8d590723948a535f, 0x579c487e5a38ad0f},
+ {0xb0af48ec79ace837, 0x2d835a9df0c6d852},
+ {0xdcdb1b2798182244, 0xf8e431456cf88e66},
+ {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900},
+ {0xac8b2d36eed2dac5, 0xe272467e3d222f40},
+ {0xd7adf884aa879177, 0x5b0ed81dcc6abb10},
+ {0x86ccbb52ea94baea, 0x98e947129fc2b4ea},
+ {0xa87fea27a539e9a5, 0x3f2398d747b36225},
+ {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae},
+ {0x83a3eeeef9153e89, 0x1953cf68300424ad},
+ {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8},
+ {0xcdb02555653131b6, 0x3792f412cb06794e},
+ {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1},
+ {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5},
+ {0xc8de047564d20a8b, 0xf245825a5a445276},
+ {0xfb158592be068d2e, 0xeed6e2f0f0d56713},
+ {0x9ced737bb6c4183d, 0x55464dd69685606c},
+ {0xc428d05aa4751e4c, 0xaa97e14c3c26b887},
+ {0xf53304714d9265df, 0xd53dd99f4b3066a9},
+ {0x993fe2c6d07b7fab, 0xe546a8038efe402a},
+ {0xbf8fdb78849a5f96, 0xde98520472bdd034},
+ {0xef73d256a5c0f77c, 0x963e66858f6d4441},
+ {0x95a8637627989aad, 0xdde7001379a44aa9},
+ {0xbb127c53b17ec159, 0x5560c018580d5d53},
+ {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7},
+ {0x9226712162ab070d, 0xcab3961304ca70e9},
+ {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23},
+ {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b},
+ {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243},
+ {0xb267ed1940f1c61c, 0x55f038b237591ed4},
+ {0xdf01e85f912e37a3, 0x6b6c46dec52f6689},
+ {0x8b61313bbabce2c6, 0x2323ac4b3b3da016},
+ {0xae397d8aa96c1b77, 0xabec975e0a0d081b},
+ {0xd9c7dced53c72255, 0x96e7bd358c904a22},
+ {0x881cea14545c7575, 0x7e50d64177da2e55},
+ {0xaa242499697392d2, 0xdde50bd1d5d0b9ea},
+ {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865},
+ {0x84ec3c97da624ab4, 0xbd5af13bef0b113f},
+ {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f},
+ {0xcfb11ead453994ba, 0x67de18eda5814af3},
+ {0x81ceb32c4b43fcf4, 0x80eacf948770ced8},
+ {0xa2425ff75e14fc31, 0xa1258379a94d028e},
+ {0xcad2f7f5359a3b3e, 0x096ee45813a04331},
+ {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd},
+ {0x9e74d1b791e07e48, 0x775ea264cf55347e},
+ {0xc612062576589dda, 0x95364afe032a819e},
+ {0xf79687aed3eec551, 0x3a83ddbd83f52205},
+ {0x9abe14cd44753b52, 0xc4926a9672793543},
+ {0xc16d9a0095928a27, 0x75b7053c0f178294},
+ {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+ {0x971da05074da7bee, 0xd3f6fc16ebca5e04},
+ {0xbce5086492111aea, 0x88f4bb1ca6bcf585},
+ {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6},
+ {0x9392ee8e921d5d07, 0x3aff322e62439fd0},
+ {0xb877aa3236a4b449, 0x09befeb9fad487c3},
+ {0xe69594bec44de15b, 0x4c2ebe687989a9b4},
+ {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11},
+ {0xb424dc35095cd80f, 0x538484c19ef38c95},
+ {0xe12e13424bb40e13, 0x2865a5f206b06fba},
+ {0x8cbccc096f5088cb, 0xf93f87b7442e45d4},
+ {0xafebff0bcb24aafe, 0xf78f69a51539d749},
+ {0xdbe6fecebdedd5be, 0xb573440e5a884d1c},
+ {0x89705f4136b4a597, 0x31680a88f8953031},
+ {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e},
+ {0xd6bf94d5e57a42bc, 0x3d32907604691b4d},
+ {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110},
+ {0xa7c5ac471b478423, 0x0fcf80dc33721d54},
+ {0xd1b71758e219652b, 0xd3c36113404ea4a9},
+ {0x83126e978d4fdf3b, 0x645a1cac083126ea},
+ {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4},
+ {0xcccccccccccccccc, 0xcccccccccccccccd},
+ {0x8000000000000000, 0x0000000000000000},
+ {0xa000000000000000, 0x0000000000000000},
+ {0xc800000000000000, 0x0000000000000000},
+ {0xfa00000000000000, 0x0000000000000000},
+ {0x9c40000000000000, 0x0000000000000000},
+ {0xc350000000000000, 0x0000000000000000},
+ {0xf424000000000000, 0x0000000000000000},
+ {0x9896800000000000, 0x0000000000000000},
+ {0xbebc200000000000, 0x0000000000000000},
+ {0xee6b280000000000, 0x0000000000000000},
+ {0x9502f90000000000, 0x0000000000000000},
+ {0xba43b74000000000, 0x0000000000000000},
+ {0xe8d4a51000000000, 0x0000000000000000},
+ {0x9184e72a00000000, 0x0000000000000000},
+ {0xb5e620f480000000, 0x0000000000000000},
+ {0xe35fa931a0000000, 0x0000000000000000},
+ {0x8e1bc9bf04000000, 0x0000000000000000},
+ {0xb1a2bc2ec5000000, 0x0000000000000000},
+ {0xde0b6b3a76400000, 0x0000000000000000},
+ {0x8ac7230489e80000, 0x0000000000000000},
+ {0xad78ebc5ac620000, 0x0000000000000000},
+ {0xd8d726b7177a8000, 0x0000000000000000},
+ {0x878678326eac9000, 0x0000000000000000},
+ {0xa968163f0a57b400, 0x0000000000000000},
+ {0xd3c21bcecceda100, 0x0000000000000000},
+ {0x84595161401484a0, 0x0000000000000000},
+ {0xa56fa5b99019a5c8, 0x0000000000000000},
+ {0xcecb8f27f4200f3a, 0x0000000000000000},
+ {0x813f3978f8940984, 0x4000000000000000},
+ {0xa18f07d736b90be5, 0x5000000000000000},
+ {0xc9f2c9cd04674ede, 0xa400000000000000},
+ {0xfc6f7c4045812296, 0x4d00000000000000},
+ {0x9dc5ada82b70b59d, 0xf020000000000000},
+ {0xc5371912364ce305, 0x6c28000000000000},
+ {0xf684df56c3e01bc6, 0xc732000000000000},
+ {0x9a130b963a6c115c, 0x3c7f400000000000},
+ {0xc097ce7bc90715b3, 0x4b9f100000000000},
+ {0xf0bdc21abb48db20, 0x1e86d40000000000},
+ {0x96769950b50d88f4, 0x1314448000000000},
+ {0xbc143fa4e250eb31, 0x17d955a000000000},
+ {0xeb194f8e1ae525fd, 0x5dcfab0800000000},
+ {0x92efd1b8d0cf37be, 0x5aa1cae500000000},
+ {0xb7abc627050305ad, 0xf14a3d9e40000000},
+ {0xe596b7b0c643c719, 0x6d9ccd05d0000000},
+ {0x8f7e32ce7bea5c6f, 0xe4820023a2000000},
+ {0xb35dbf821ae4f38b, 0xdda2802c8a800000},
+ {0xe0352f62a19e306e, 0xd50b2037ad200000},
+ {0x8c213d9da502de45, 0x4526f422cc340000},
+ {0xaf298d050e4395d6, 0x9670b12b7f410000},
+ {0xdaf3f04651d47b4c, 0x3c0cdd765f114000},
+ {0x88d8762bf324cd0f, 0xa5880a69fb6ac800},
+ {0xab0e93b6efee0053, 0x8eea0d047a457a00},
+ {0xd5d238a4abe98068, 0x72a4904598d6d880},
+ {0x85a36366eb71f041, 0x47a6da2b7f864750},
+ {0xa70c3c40a64e6c51, 0x999090b65f67d924},
+ {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d},
+ {0x82818f1281ed449f, 0xbff8f10e7a8921a4},
+ {0xa321f2d7226895c7, 0xaff72d52192b6a0d},
+ {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490},
+ {0xfee50b7025c36a08, 0x02f236d04753d5b4},
+ {0x9f4f2726179a2245, 0x01d762422c946590},
+ {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5},
+ {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2},
+ {0x9b934c3b330c8577, 0x63cc55f49f88eb2f},
+ {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb},
+ {0xf316271c7fc3908a, 0x8bef464e3945ef7a},
+ {0x97edd871cfda3a56, 0x97758bf0e3cbb5ac},
+ {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317},
+ {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd},
+ {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a},
+ {0xb975d6b6ee39e436, 0xb3e2fd538e122b44},
+ {0xe7d34c64a9c85d44, 0x60dbbca87196b616},
+ {0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd},
+ {0xb51d13aea4a488dd, 0x6babab6398bdbe41},
+ {0xe264589a4dcdab14, 0xc696963c7eed2dd1},
+ {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2},
+ {0xb0de65388cc8ada8, 0x3b25a55f43294bcb},
+ {0xdd15fe86affad912, 0x49ef0eb713f39ebe},
+ {0x8a2dbf142dfcc7ab, 0x6e3569326c784337},
+ {0xacb92ed9397bf996, 0x49c2c37f07965404},
+ {0xd7e77a8f87daf7fb, 0xdc33745ec97be906},
+ {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3},
+ {0xa8acd7c0222311bc, 0xc40832ea0d68ce0c},
+ {0xd2d80db02aabd62b, 0xf50a3fa490c30190},
+ {0x83c7088e1aab65db, 0x792667c6da79e0fa},
+ {0xa4b8cab1a1563f52, 0x577001b891185938},
+ {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86},
+ {0x80b05e5ac60b6178, 0x544f8158315b05b4},
+ {0xa0dc75f1778e39d6, 0x696361ae3db1c721},
+ {0xc913936dd571c84c, 0x03bc3a19cd1e38e9},
+ {0xfb5878494ace3a5f, 0x04ab48a04065c723},
+ {0x9d174b2dcec0e47b, 0x62eb0d64283f9c76},
+ {0xc45d1df942711d9a, 0x3ba5d0bd324f8394},
+ {0xf5746577930d6500, 0xca8f44ec7ee36479},
+ {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb},
+ {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e},
+ {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e},
+ {0x95d04aee3b80ece5, 0xbba1f1d158724a12},
+ {0xbb445da9ca61281f, 0x2a8a6e45ae8edc97},
+ {0xea1575143cf97226, 0xf52d09d71a3293bd},
+ {0x924d692ca61be758, 0x593c2626705f9c56},
+ {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c},
+ {0xe498f455c38b997a, 0x0b6dfb9c0f956447},
+ {0x8edf98b59a373fec, 0x4724bd4189bd5eac},
+ {0xb2977ee300c50fe7, 0x58edec91ec2cb657},
+ {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed},
+ {0x8b865b215899f46c, 0xbd79e0d20082ee74},
+ {0xae67f1e9aec07187, 0xecd8590680a3aa11},
+ {0xda01ee641a708de9, 0xe80e6f4820cc9495},
+ {0x884134fe908658b2, 0x3109058d147fdcdd},
+ {0xaa51823e34a7eede, 0xbd4b46f0599fd415},
+ {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a},
+ {0x850fadc09923329e, 0x03e2cf6bc604ddb0},
+ {0xa6539930bf6bff45, 0x84db8346b786151c},
+ {0xcfe87f7cef46ff16, 0xe612641865679a63},
+ {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e},
+ {0xa26da3999aef7749, 0xe3be5e330f38f09d},
+ {0xcb090c8001ab551c, 0x5cadf5bfd3072cc5},
+ {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6},
+ {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa},
+ {0xc646d63501a1511d, 0xb281e1fd541501b8},
+ {0xf7d88bc24209a565, 0x1f225a7ca91a4226},
+ {0x9ae757596946075f, 0x3375788de9b06958},
+ {0xc1a12d2fc3978937, 0x0052d6b1641c83ae},
+ {0xf209787bb47d6b84, 0xc0678c5dbd23a49a},
+ {0x9745eb4d50ce6332, 0xf840b7ba963646e0},
+ {0xbd176620a501fbff, 0xb650e5a93bc3d898},
+ {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe},
+ {0x93ba47c980e98cdf, 0xc66f336c36b10137},
+ {0xb8a8d9bbe123f017, 0xb80b0047445d4184},
+ {0xe6d3102ad96cec1d, 0xa60dc059157491e5},
+ {0x9043ea1ac7e41392, 0x87c89837ad68db2f},
+ {0xb454e4a179dd1877, 0x29babe4598c311fb},
+ {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a},
+ {0x8ce2529e2734bb1d, 0x1899e4a65f58660c},
+ {0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f},
+ {0xdc21a1171d42645d, 0x76707543f4fa1f73},
+ {0x899504ae72497eba, 0x6a06494a791c53a8},
+ {0xabfa45da0edbde69, 0x0487db9d17636892},
+ {0xd6f8d7509292d603, 0x45a9d2845d3c42b6},
+ {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2},
+ {0xa7f26836f282b732, 0x8e6cac7768d7141e},
+ {0xd1ef0244af2364ff, 0x3207d795430cd926},
+ {0x8335616aed761f1f, 0x7f44e6bd49e807b8},
+ {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6},
+ {0xcd036837130890a1, 0x36dba887c37a8c0f},
+ {0x802221226be55a64, 0xc2494954da2c9789},
+ {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c},
+ {0xc83553c5c8965d3d, 0x6f92829494e5acc7},
+ {0xfa42a8b73abbf48c, 0xcb772339ba1f17f9},
+ {0x9c69a97284b578d7, 0xff2a760414536efb},
+ {0xc38413cf25e2d70d, 0xfef5138519684aba},
+ {0xf46518c2ef5b8cd1, 0x7eb258665fc25d69},
+ {0x98bf2f79d5993802, 0xef2f773ffbd97a61},
+ {0xbeeefb584aff8603, 0xaafb550ffacfd8fa},
+ {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38},
+ {0x952ab45cfa97a0b2, 0xdd945a747bf26183},
+ {0xba756174393d88df, 0x94f971119aeef9e4},
+ {0xe912b9d1478ceb17, 0x7a37cd5601aab85d},
+ {0x91abb422ccb812ee, 0xac62e055c10ab33a},
+ {0xb616a12b7fe617aa, 0x577b986b314d6009},
+ {0xe39c49765fdf9d94, 0xed5a7e85fda0b80b},
+ {0x8e41ade9fbebc27d, 0x14588f13be847307},
+ {0xb1d219647ae6b31c, 0x596eb2d8ae258fc8},
+ {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb},
+ {0x8aec23d680043bee, 0x25de7bb9480d5854},
+ {0xada72ccc20054ae9, 0xaf561aa79a10ae6a},
+ {0xd910f7ff28069da4, 0x1b2ba1518094da04},
+ {0x87aa9aff79042286, 0x90fb44d2f05d0842},
+ {0xa99541bf57452b28, 0x353a1607ac744a53},
+ {0xd3fa922f2d1675f2, 0x42889b8997915ce8},
+ {0x847c9b5d7c2e09b7, 0x69956135febada11},
+ {0xa59bc234db398c25, 0x43fab9837e699095},
+ {0xcf02b2c21207ef2e, 0x94f967e45e03f4bb},
+ {0x8161afb94b44f57d, 0x1d1be0eebac278f5},
+ {0xa1ba1ba79e1632dc, 0x6462d92a69731732},
+ {0xca28a291859bbf93, 0x7d7b8f7503cfdcfe},
+ {0xfcb2cb35e702af78, 0x5cda735244c3d43e},
+ {0x9defbf01b061adab, 0x3a0888136afa64a7},
+ {0xc56baec21c7a1916, 0x088aaa1845b8fdd0},
+ {0xf6c69a72a3989f5b, 0x8aad549e57273d45},
+ {0x9a3c2087a63f6399, 0x36ac54e2f678864b},
+ {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd},
+ {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5},
+ {0x969eb7c47859e743, 0x9f644ae5a4b1b325},
+ {0xbc4665b596706114, 0x873d5d9f0dde1fee},
+ {0xeb57ff22fc0c7959, 0xa90cb506d155a7ea},
+ {0x9316ff75dd87cbd8, 0x09a7f12442d588f2},
+ {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb2f},
+ {0xe5d3ef282a242e81, 0x8f1668c8a86da5fa},
+ {0x8fa475791a569d10, 0xf96e017d694487bc},
+ {0xb38d92d760ec4455, 0x37c981dcc395a9ac},
+ {0xe070f78d3927556a, 0x85bbe253f47b1417},
+ {0x8c469ab843b89562, 0x93956d7478ccec8e},
+ {0xaf58416654a6babb, 0x387ac8d1970027b2},
+ {0xdb2e51bfe9d0696a, 0x06997b05fcc0319e},
+ {0x88fcf317f22241e2, 0x441fece3bdf81f03},
+ {0xab3c2fddeeaad25a, 0xd527e81cad7626c3},
+ {0xd60b3bd56a5586f1, 0x8a71e223d8d3b074},
+ {0x85c7056562757456, 0xf6872d5667844e49},
+ {0xa738c6bebb12d16c, 0xb428f8ac016561db},
+ {0xd106f86e69d785c7, 0xe13336d701beba52},
+ {0x82a45b450226b39c, 0xecc0024661173473},
+ {0xa34d721642b06084, 0x27f002d7f95d0190},
+ {0xcc20ce9bd35c78a5, 0x31ec038df7b441f4},
+ {0xff290242c83396ce, 0x7e67047175a15271},
+ {0x9f79a169bd203e41, 0x0f0062c6e984d386},
+ {0xc75809c42c684dd1, 0x52c07b78a3e60868},
+ {0xf92e0c3537826145, 0xa7709a56ccdf8a82},
+ {0x9bbcc7a142b17ccb, 0x88a66076400bb691},
+ {0xc2abf989935ddbfe, 0x6acff893d00ea435},
+ {0xf356f7ebf83552fe, 0x0583f6b8c4124d43},
+ {0x98165af37b2153de, 0xc3727a337a8b704a},
+ {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c},
+ {0xeda2ee1c7064130c, 0x1162def06f79df73},
+ {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8},
+ {0xb9a74a0637ce2ee1, 0x6d953e2bd7173692},
+ {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437},
+ {0x910ab1d4db9914a0, 0x1d9c9892400a22a2},
+ {0xb54d5e4a127f59c8, 0x2503beb6d00cab4b},
+ {0xe2a0b5dc971f303a, 0x2e44ae64840fd61d},
+ {0x8da471a9de737e24, 0x5ceaecfed289e5d2},
+ {0xb10d8e1456105dad, 0x7425a83e872c5f47},
+ {0xdd50f1996b947518, 0xd12f124e28f77719},
+ {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f},
+ {0xace73cbfdc0bfb7b, 0x636cc64d1001550b},
+ {0xd8210befd30efa5a, 0x3c47f7e05401aa4e},
+ {0x8714a775e3e95c78, 0x65acfaec34810a71},
+ {0xa8d9d1535ce3b396, 0x7f1839a741a14d0d},
+ {0xd31045a8341ca07c, 0x1ede48111209a050},
+ {0x83ea2b892091e44d, 0x934aed0aab460432},
+ {0xa4e4b66b68b65d60, 0xf81da84d5617853f},
+ {0xce1de40642e3f4b9, 0x36251260ab9d668e},
+ {0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019},
+ {0xa1075a24e4421730, 0xb24cf65b8612f81f},
+ {0xc94930ae1d529cfc, 0xdee033f26797b627},
+ {0xfb9b7cd9a4a7443c, 0x169840ef017da3b1},
+ {0x9d412e0806e88aa5, 0x8e1f289560ee864e},
+ {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2},
+ {0xf5b5d7ec8acb58a2, 0xae10af696774b1db},
+ {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29},
+ {0xbff610b0cc6edd3f, 0x17fd090a58d32af3},
+ {0xeff394dcff8a948e, 0xddfc4b4cef07f5b0},
+ {0x95f83d0a1fb69cd9, 0x4abdaf101564f98e},
+ {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1},
+ {0xea53df5fd18d5513, 0x84c86189216dc5ed},
+ {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4},
+ {0xb7118682dbb66a77, 0x3fbc8c33221dc2a1},
+ {0xe4d5e82392a40515, 0x0fabaf3feaa5334a},
+ {0x8f05b1163ba6832d, 0x29cb4d87f2a7400e},
+ {0xb2c71d5bca9023f8, 0x743e20e9ef511012},
+ {0xdf78e4b2bd342cf6, 0x914da9246b255416},
+ {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e},
+ {0xae9672aba3d0c320, 0xa184ac2473b529b1},
+ {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e},
+ {0x8865899617fb1871, 0x7e2fa67c7a658892},
+ {0xaa7eebfb9df9de8d, 0xddbb901b98feeab7},
+ {0xd51ea6fa85785631, 0x552a74227f3ea565},
+ {0x8533285c936b35de, 0xd53a88958f87275f},
+ {0xa67ff273b8460356, 0x8a892abaf368f137},
+ {0xd01fef10a657842c, 0x2d2b7569b0432d85},
+ {0x8213f56a67f6b29b, 0x9c3b29620e29fc73},
+ {0xa298f2c501f45f42, 0x8349f3ba91b47b8f},
+ {0xcb3f2f7642717713, 0x241c70a936219a73},
+ {0xfe0efb53d30dd4d7, 0xed238cd383aa0110},
+ {0x9ec95d1463e8a506, 0xf4363804324a40aa},
+ {0xc67bb4597ce2ce48, 0xb143c6053edcd0d5},
+ {0xf81aa16fdc1b81da, 0xdd94b7868e94050a},
+ {0x9b10a4e5e9913128, 0xca7cf2b4191c8326},
+ {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0},
+ {0xf24a01a73cf2dccf, 0xbc633b39673c8cec},
+ {0x976e41088617ca01, 0xd5be0503e085d813},
+ {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18},
+ {0xec9c459d51852ba2, 0xddf8e7d60ed1219e},
+ {0x93e1ab8252f33b45, 0xcabb90e5c942b503},
+ {0xb8da1662e7b00a17, 0x3d6a751f3b936243},
+ {0xe7109bfba19c0c9d, 0x0cc512670a783ad4},
+ {0x906a617d450187e2, 0x27fb2b80668b24c5},
+ {0xb484f9dc9641e9da, 0xb1f9f660802dedf6},
+ {0xe1a63853bbd26451, 0x5e7873f8a0396973},
+ {0x8d07e33455637eb2, 0xdb0b487b6423e1e8},
+ {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62},
+ {0xdc5c5301c56b75f7, 0x7641a140cc7810fb},
+ {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d},
+ {0xac2820d9623bf429, 0x546345fa9fbdcd44},
+ {0xd732290fbacaf133, 0xa97c177947ad4095},
+ {0x867f59a9d4bed6c0, 0x49ed8eabcccc485d},
+ {0xa81f301449ee8c70, 0x5c68f256bfff5a74},
+ {0xd226fc195c6a2f8c, 0x73832eec6fff3111},
+ {0x83585d8fd9c25db7, 0xc831fd53c5ff7eab},
+ {0xa42e74f3d032f525, 0xba3e7ca8b77f5e55},
+ {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb},
+ {0x80444b5e7aa7cf85, 0x7980d163cf5b81b3},
+ {0xa0555e361951c366, 0xd7e105bcc332621f},
+ {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7},
+ {0xfa856334878fc150, 0xb14f98f6f0feb951},
+ {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3},
+ {0xc3b8358109e84f07, 0x0a862f80ec4700c8},
+ {0xf4a642e14c6262c8, 0xcd27bb612758c0fa},
+ {0x98e7e9cccfbd7dbd, 0x8038d51cb897789c},
+ {0xbf21e44003acdd2c, 0xe0470a63e6bd56c3},
+ {0xeeea5d5004981478, 0x1858ccfce06cac74},
+ {0x95527a5202df0ccb, 0x0f37801e0c43ebc8},
+ {0xbaa718e68396cffd, 0xd30560258f54e6ba},
+ {0xe950df20247c83fd, 0x47c6b82ef32a2069},
+ {0x91d28b7416cdd27e, 0x4cdc331d57fa5441},
+ {0xb6472e511c81471d, 0xe0133fe4adf8e952},
+ {0xe3d8f9e563a198e5, 0x58180fddd97723a6},
+ {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648},
+ {0xb201833b35d63f73, 0x2cd2cc6551e513da},
+ {0xde81e40a034bcf4f, 0xf8077f7ea65e58d1},
+ {0x8b112e86420f6191, 0xfb04afaf27faf782},
+ {0xadd57a27d29339f6, 0x79c5db9af1f9b563},
+ {0xd94ad8b1c7380874, 0x18375281ae7822bc},
+ {0x87cec76f1c830548, 0x8f2293910d0b15b5},
+ {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb22},
+ {0xd433179d9c8cb841, 0x5fa60692a46151eb},
+ {0x849feec281d7f328, 0xdbc7c41ba6bcd333},
+ {0xa5c7ea73224deff3, 0x12b9b522906c0800},
+ {0xcf39e50feae16bef, 0xd768226b34870a00},
+ {0x81842f29f2cce375, 0xe6a1158300d46640},
+ {0xa1e53af46f801c53, 0x60495ae3c1097fd0},
+ {0xca5e89b18b602368, 0x385bb19cb14bdfc4},
+ {0xfcf62c1dee382c42, 0x46729e03dd9ed7b5},
+ {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d1},
+ {0xc5a05277621be293, 0xc7098b7305241885},
+ {0xf70867153aa2db38, 0xb8cbee4fc66d1ea7}
+#else
+ {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+ {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+ {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+ {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+ {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+ {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+ {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+ {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+ {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+ {0x95a8637627989aad, 0xdde7001379a44aa9},
+ {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+ {0xc350000000000000, 0x0000000000000000},
+ {0x9dc5ada82b70b59d, 0xf020000000000000},
+ {0xfee50b7025c36a08, 0x02f236d04753d5b4},
+ {0xcde6fd5e09abcf26, 0xed4c0226b55e6f86},
+ {0xa6539930bf6bff45, 0x84db8346b786151c},
+ {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b2},
+ {0xd910f7ff28069da4, 0x1b2ba1518094da04},
+ {0xaf58416654a6babb, 0x387ac8d1970027b2},
+ {0x8da471a9de737e24, 0x5ceaecfed289e5d2},
+ {0xe4d5e82392a40515, 0x0fabaf3feaa5334a},
+ {0xb8da1662e7b00a17, 0x3d6a751f3b936243},
+ {0x95527a5202df0ccb, 0x0f37801e0c43ebc8}
+#endif
+};
+
+#if !FMT_USE_FULL_CACHE_DRAGONBOX
+template <typename T>
+const uint64_t basic_data<T>::powers_of_5_64[] = {
+ 0x0000000000000001, 0x0000000000000005, 0x0000000000000019,
+ 0x000000000000007d, 0x0000000000000271, 0x0000000000000c35,
+ 0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1,
+ 0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd,
+ 0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9,
+ 0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5,
+ 0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631,
+ 0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed,
+ 0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9};
+
+template <typename T>
+const uint32_t basic_data<T>::dragonbox_pow10_recovery_errors[] = {
+ 0x50001400, 0x54044100, 0x54014555, 0x55954415, 0x54115555, 0x00000001,
+ 0x50000000, 0x00104000, 0x54010004, 0x05004001, 0x55555544, 0x41545555,
+ 0x54040551, 0x15445545, 0x51555514, 0x10000015, 0x00101100, 0x01100015,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04450514, 0x45414110,
+ 0x55555145, 0x50544050, 0x15040155, 0x11054140, 0x50111514, 0x11451454,
+ 0x00400541, 0x00000000, 0x55555450, 0x10056551, 0x10054011, 0x55551014,
+ 0x69514555, 0x05151109, 0x00155555};
+#endif
+
template <typename T>
const char basic_data<T>::foreground_color[] = "\x1b[38;2;";
template <typename T>
private:
using significand_type = uint64_t;
+ template <typename Float>
+ using is_supported_float = bool_constant<sizeof(Float) == sizeof(uint64_t) ||
+ sizeof(Float) == sizeof(uint32_t)>;
+
public:
significand_type f;
int e;
template <typename Double> explicit fp(Double d) { assign(d); }
// Assigns d to this and return true iff predecessor is closer than successor.
- template <typename Double, FMT_ENABLE_IF(sizeof(Double) == sizeof(uint64_t))>
- bool assign(Double d) {
- // Assume double is in the format [sign][exponent][significand].
- using limits = std::numeric_limits<Double>;
+ template <typename Float, FMT_ENABLE_IF(is_supported_float<Float>::value)>
+ bool assign(Float d) {
+ // Assume float is in the format [sign][exponent][significand].
+ using limits = std::numeric_limits<Float>;
+ const int float_significand_size = limits::digits - 1;
const int exponent_size =
- bits<Double>::value - double_significand_size - 1; // -1 for sign
- const uint64_t significand_mask = implicit_bit - 1;
+ bits<Float>::value - float_significand_size - 1; // -1 for sign
+ const uint64_t float_implicit_bit = 1ULL << float_significand_size;
+ const uint64_t significand_mask = float_implicit_bit - 1;
const uint64_t exponent_mask = (~0ULL >> 1) & ~significand_mask;
const int exponent_bias = (1 << exponent_size) - limits::max_exponent - 1;
- auto u = bit_cast<uint64_t>(d);
+ constexpr bool is_double = sizeof(Float) == sizeof(uint64_t);
+ auto u = bit_cast<conditional_t<is_double, uint64_t, uint32_t>>(d);
f = u & significand_mask;
int biased_e =
- static_cast<int>((u & exponent_mask) >> double_significand_size);
+ static_cast<int>((u & exponent_mask) >> float_significand_size);
// Predecessor is closer if d is a normalized power of 2 (f == 0) other than
// the smallest normalized number (biased_e > 1).
bool is_predecessor_closer = f == 0 && biased_e > 1;
if (biased_e != 0)
- f += implicit_bit;
+ f += float_implicit_bit;
else
biased_e = 1; // Subnormals use biased exponent 1 (min exponent).
- e = biased_e - exponent_bias - double_significand_size;
+ e = biased_e - exponent_bias - float_significand_size;
return is_predecessor_closer;
}
- template <typename Double, FMT_ENABLE_IF(sizeof(Double) != sizeof(uint64_t))>
- bool assign(Double) {
+ template <typename Float, FMT_ENABLE_IF(!is_supported_float<Float>::value)>
+ bool assign(Float) {
*this = fp();
return false;
}
-
- // Assigns d to this together with computing lower and upper boundaries,
- // where a boundary is a value half way between the number and its predecessor
- // (lower) or successor (upper). The upper boundary is normalized and lower
- // has the same exponent but may be not normalized.
- template <typename Double> boundaries assign_with_boundaries(Double d) {
- bool is_lower_closer = assign(d);
- fp lower =
- is_lower_closer ? fp((f << 2) - 1, e - 2) : fp((f << 1) - 1, e - 1);
- // 1 in normalize accounts for the exponent shift above.
- fp upper = normalize<1>(fp((f << 1) + 1, e - 1));
- lower.f <<= lower.e - upper.e;
- return boundaries{lower.f, upper.f};
- }
-
- template <typename Double> boundaries assign_float_with_boundaries(Double d) {
- assign(d);
- constexpr int min_normal_e = std::numeric_limits<float>::min_exponent -
- std::numeric_limits<double>::digits;
- significand_type half_ulp = 1 << (std::numeric_limits<double>::digits -
- std::numeric_limits<float>::digits - 1);
- if (min_normal_e > e) half_ulp <<= min_normal_e - e;
- fp upper = normalize<0>(fp(f + half_ulp, e));
- fp lower = fp(
- f - (half_ulp >> ((f == implicit_bit && e > min_normal_e) ? 1 : 0)), e);
- lower.f <<= lower.e - upper.e;
- return boundaries{lower.f, upper.f};
- }
};
// Normalizes the value converted from double and multiplied by (1 << SHIFT).
// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its
// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`.
inline fp get_cached_power(int min_exponent, int& pow10_exponent) {
- const int64_t one_over_log2_10 = 0x4d104d42; // round(pow(2, 32) / log2(10))
+ const int shift = 32;
+ const auto significand = static_cast<int64_t>(data::log10_2_significand);
int index = static_cast<int>(
- ((min_exponent + fp::significand_size - 1) * one_over_log2_10 +
- ((int64_t(1) << 32) - 1)) // ceil
- >> 32 // arithmetic shift
+ ((min_exponent + fp::significand_size - 1) * (significand >> shift) +
+ ((int64_t(1) << shift) - 1)) // ceil
+ >> 32 // arithmetic shift
);
// Decimal exponent of the first (smallest) cached power of 10.
const int first_dec_exp = -348;
const int dec_exp_step = 8;
index = (index - first_dec_exp - 1) / dec_exp_step + 1;
pow10_exponent = first_dec_exp + index * dec_exp_step;
- return {data::pow10_significands[index], data::pow10_exponents[index]};
+ return {data::grisu_pow10_significands[index],
+ data::grisu_pow10_exponents[index]};
}
// A simple accumulator to hold the sums of terms in bigint::square if uint128_t
FMT_ASSERT(compare(*this, other) >= 0, "");
bigit borrow = 0;
int i = other.exp_ - exp_;
- for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) {
+ for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j)
subtract_bigits(i, other.bigits_[j], borrow);
- }
while (borrow > 0) subtract_bigits(i, 0, borrow);
remove_leading_zeros();
}
exp_ *= 2;
}
+ // If this bigint has a bigger exponent than other, adds trailing zero to make
+ // exponents equal. This simplifies some operations such as subtraction.
+ void align(const bigint& other) {
+ int exp_difference = exp_ - other.exp_;
+ if (exp_difference <= 0) return;
+ int num_bigits = static_cast<int>(bigits_.size());
+ bigits_.resize(to_unsigned(num_bigits + exp_difference));
+ for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j)
+ bigits_[j] = bigits_[i];
+ std::uninitialized_fill_n(bigits_.data(), exp_difference, 0);
+ exp_ -= exp_difference;
+ }
+
// Divides this bignum by divisor, assigning the remainder to this and
// returning the quotient.
int divmod_assign(const bigint& divisor) {
FMT_ASSERT(this != &divisor, "");
if (compare(*this, divisor) < 0) return 0;
- int num_bigits = static_cast<int>(bigits_.size());
FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, "");
- int exp_difference = exp_ - divisor.exp_;
- if (exp_difference > 0) {
- // Align bigints by adding trailing zeros to simplify subtraction.
- bigits_.resize(to_unsigned(num_bigits + exp_difference));
- for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j)
- bigits_[j] = bigits_[i];
- std::uninitialized_fill_n(bigits_.data(), exp_difference, 0);
- exp_ -= exp_difference;
- }
+ align(divisor);
int quotient = 0;
do {
subtract_aligned(divisor);
};
}
-// A version of count_digits optimized for grisu_gen_digits.
-inline int grisu_count_digits(uint32_t n) {
- if (n < 10) return 1;
- if (n < 100) return 2;
- if (n < 1000) return 3;
- if (n < 10000) return 4;
- if (n < 100000) return 5;
- if (n < 1000000) return 6;
- if (n < 10000000) return 7;
- if (n < 100000000) return 8;
- if (n < 1000000000) return 9;
- return 10;
-}
-
// Generates output using the Grisu digit-gen algorithm.
// error: the size of the region (lower, upper) outside of which numbers
// definitely do not round to value (Delta in Grisu3).
FMT_ASSERT(integral == value.f >> -one.e, "");
// The fractional part of scaled value (p2 in Grisu) c = value % one.
uint64_t fractional = value.f & (one.f - 1);
- exp = grisu_count_digits(integral); // kappa in Grisu.
+ exp = count_digits(integral); // kappa in Grisu.
// Divide by 10 to prevent overflow.
auto result = handler.on_start(data::powers_of_10_64[exp - 1] << -one.e,
value.f / 10, error * 10, exp);
FMT_ASSERT(false, "invalid number of digits");
}
--exp;
- uint64_t remainder =
- (static_cast<uint64_t>(integral) << -one.e) + fractional;
+ auto remainder = (static_cast<uint64_t>(integral) << -one.e) + fractional;
result = handler.on_digit(static_cast<char>('0' + digit),
data::powers_of_10_64[exp] << -one.e, remainder,
error, exp, true);
for (;;) {
fractional *= 10;
error *= 10;
- char digit =
- static_cast<char>('0' + static_cast<char>(fractional >> -one.e));
+ char digit = static_cast<char>('0' + (fractional >> -one.e));
fractional &= one.f - 1;
--exp;
result = handler.on_digit(digit, one.f, fractional, error, exp, false);
uint64_t error, int, bool integral) {
FMT_ASSERT(remainder < divisor, "");
buf[size++] = digit;
+ if (!integral && error >= remainder) return digits::error;
if (size < precision) return digits::more;
if (!integral) {
// Check if error * 2 < divisor with overflow prevention.
}
if (buf[0] > '9') {
buf[0] = '1';
- buf[size++] = '0';
+ if (fixed)
+ buf[size++] = '0';
+ else
+ ++exp10;
}
return digits::done;
}
};
-// The shortest representation digit handler.
-struct grisu_shortest_handler {
- char* buf;
- int size;
- // Distance between scaled value and upper bound (wp_W in Grisu3).
- uint64_t diff;
+// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox.
+namespace dragonbox {
+// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
+FMT_SAFEBUFFERS inline uint128_wrapper umul128(uint64_t x,
+ uint64_t y) FMT_NOEXCEPT {
+#if FMT_USE_INT128
+ return static_cast<uint128_t>(x) * static_cast<uint128_t>(y);
+#elif defined(_MSC_VER) && defined(_M_X64)
+ uint128_wrapper result;
+ result.low_ = _umul128(x, y, &result.high_);
+ return result;
+#else
+ const uint64_t mask = (uint64_t(1) << 32) - uint64_t(1);
+
+ uint64_t a = x >> 32;
+ uint64_t b = x & mask;
+ uint64_t c = y >> 32;
+ uint64_t d = y & mask;
+
+ uint64_t ac = a * c;
+ uint64_t bc = b * c;
+ uint64_t ad = a * d;
+ uint64_t bd = b * d;
+
+ uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask);
+
+ return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
+ (intermediate << 32) + (bd & mask)};
+#endif
+}
+
+// Computes upper 64 bits of multiplication of two 64-bit unsigned integers.
+FMT_SAFEBUFFERS inline uint64_t umul128_upper64(uint64_t x,
+ uint64_t y) FMT_NOEXCEPT {
+#if FMT_USE_INT128
+ auto p = static_cast<uint128_t>(x) * static_cast<uint128_t>(y);
+ return static_cast<uint64_t>(p >> 64);
+#elif defined(_MSC_VER) && defined(_M_X64)
+ return __umulh(x, y);
+#else
+ return umul128(x, y).high();
+#endif
+}
+
+// Computes upper 64 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+FMT_SAFEBUFFERS inline uint64_t umul192_upper64(uint64_t x, uint128_wrapper y)
+ FMT_NOEXCEPT {
+ uint128_wrapper g0 = umul128(x, y.high());
+ g0 += umul128_upper64(x, y.low());
+ return g0.high();
+}
+
+// Computes upper 32 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline uint32_t umul96_upper32(uint32_t x, uint64_t y) FMT_NOEXCEPT {
+ return static_cast<uint32_t>(umul128_upper64(x, y));
+}
+
+// Computes middle 64 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+FMT_SAFEBUFFERS inline uint64_t umul192_middle64(uint64_t x, uint128_wrapper y)
+ FMT_NOEXCEPT {
+ uint64_t g01 = x * y.high();
+ uint64_t g10 = umul128_upper64(x, y.low());
+ return g01 + g10;
+}
+
+// Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline uint64_t umul96_lower64(uint32_t x, uint64_t y) FMT_NOEXCEPT {
+ return x * y;
+}
+
+// Computes floor(log10(pow(2, e))) for e in [-1700, 1700] using the method from
+// https://fmt.dev/papers/Grisu-Exact.pdf#page=5, section 3.4.
+inline int floor_log10_pow2(int e) FMT_NOEXCEPT {
+ FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent");
+ const int shift = 22;
+ return (e * static_cast<int>(data::log10_2_significand >> (64 - shift))) >>
+ shift;
+}
+
+// Various fast log computations.
+inline int floor_log2_pow10(int e) FMT_NOEXCEPT {
+ FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent");
+ const uint64_t log2_10_integer_part = 3;
+ const uint64_t log2_10_fractional_digits = 0x5269e12f346e2bf9;
+ const int shift_amount = 19;
+ return (e * static_cast<int>(
+ (log2_10_integer_part << shift_amount) |
+ (log2_10_fractional_digits >> (64 - shift_amount)))) >>
+ shift_amount;
+}
+inline int floor_log10_pow2_minus_log10_4_over_3(int e) FMT_NOEXCEPT {
+ FMT_ASSERT(e <= 1700 && e >= -1700, "too large exponent");
+ const uint64_t log10_4_over_3_fractional_digits = 0x1ffbfc2bbc780375;
+ const int shift_amount = 22;
+ return (e * static_cast<int>(data::log10_2_significand >>
+ (64 - shift_amount)) -
+ static_cast<int>(log10_4_over_3_fractional_digits >>
+ (64 - shift_amount))) >>
+ shift_amount;
+}
+
+// Returns true iff x is divisible by pow(2, exp).
+inline bool divisible_by_power_of_2(uint32_t x, int exp) FMT_NOEXCEPT {
+ FMT_ASSERT(exp >= 1, "");
+ FMT_ASSERT(x != 0, "");
+#ifdef FMT_BUILTIN_CTZ
+ return FMT_BUILTIN_CTZ(x) >= exp;
+#else
+ return exp < num_bits<uint32_t>() && x == ((x >> exp) << exp);
+#endif
+}
+inline bool divisible_by_power_of_2(uint64_t x, int exp) FMT_NOEXCEPT {
+ FMT_ASSERT(exp >= 1, "");
+ FMT_ASSERT(x != 0, "");
+#ifdef FMT_BUILTIN_CTZLL
+ return FMT_BUILTIN_CTZLL(x) >= exp;
+#else
+ return exp < num_bits<uint64_t>() && x == ((x >> exp) << exp);
+#endif
+}
+
+// Returns true iff x is divisible by pow(5, exp).
+inline bool divisible_by_power_of_5(uint32_t x, int exp) FMT_NOEXCEPT {
+ FMT_ASSERT(exp <= 10, "too large exponent");
+ return x * data::divtest_table_for_pow5_32[exp].mod_inv <=
+ data::divtest_table_for_pow5_32[exp].max_quotient;
+}
+inline bool divisible_by_power_of_5(uint64_t x, int exp) FMT_NOEXCEPT {
+ FMT_ASSERT(exp <= 23, "too large exponent");
+ return x * data::divtest_table_for_pow5_64[exp].mod_inv <=
+ data::divtest_table_for_pow5_64[exp].max_quotient;
+}
+
+// Replaces n by floor(n / pow(5, N)) returning true if and only if n is
+// divisible by pow(5, N).
+// Precondition: n <= 2 * pow(5, N + 1).
+template <int N>
+bool check_divisibility_and_divide_by_pow5(uint32_t& n) FMT_NOEXCEPT {
+ static constexpr struct {
+ uint32_t magic_number;
+ int bits_for_comparison;
+ uint32_t threshold;
+ int shift_amount;
+ } infos[] = {{0xcccd, 16, 0x3333, 18}, {0xa429, 8, 0x0a, 20}};
+ constexpr auto info = infos[N - 1];
+ n *= info.magic_number;
+ const uint32_t comparison_mask = (1u << info.bits_for_comparison) - 1;
+ bool result = (n & comparison_mask) <= info.threshold;
+ n >>= info.shift_amount;
+ return result;
+}
+
+// Computes floor(n / pow(10, N)) for small n and N.
+// Precondition: n <= pow(10, N + 1).
+template <int N> uint32_t small_division_by_pow10(uint32_t n) FMT_NOEXCEPT {
+ static constexpr struct {
+ uint32_t magic_number;
+ int shift_amount;
+ uint32_t divisor_times_10;
+ } infos[] = {{0xcccd, 19, 100}, {0xa3d8, 22, 1000}};
+ constexpr auto info = infos[N - 1];
+ FMT_ASSERT(n <= info.divisor_times_10, "n is too large");
+ return n * info.magic_number >> info.shift_amount;
+}
+
+// Computes floor(n / 10^(kappa + 1)) (float)
+inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) FMT_NOEXCEPT {
+ return n / float_info<float>::big_divisor;
+}
+// Computes floor(n / 10^(kappa + 1)) (double)
+inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) FMT_NOEXCEPT {
+ return umul128_upper64(n, 0x83126e978d4fdf3c) >> 9;
+}
+
+// Various subroutines using pow10 cache
+template <class T> struct cache_accessor;
+
+template <> struct cache_accessor<float> {
+ using carrier_uint = float_info<float>::carrier_uint;
+ using cache_entry_type = uint64_t;
+
+ static uint64_t get_cached_power(int k) FMT_NOEXCEPT {
+ FMT_ASSERT(k >= float_info<float>::min_k && k <= float_info<float>::max_k,
+ "k is out of range");
+ return data::dragonbox_pow10_significands_64[k - float_info<float>::min_k];
+ }
+
+ static carrier_uint compute_mul(carrier_uint u,
+ const cache_entry_type& cache) FMT_NOEXCEPT {
+ return umul96_upper32(u, cache);
+ }
+
+ static uint32_t compute_delta(const cache_entry_type& cache,
+ int beta_minus_1) FMT_NOEXCEPT {
+ return static_cast<uint32_t>(cache >> (64 - 1 - beta_minus_1));
+ }
+
+ static bool compute_mul_parity(carrier_uint two_f,
+ const cache_entry_type& cache,
+ int beta_minus_1) FMT_NOEXCEPT {
+ FMT_ASSERT(beta_minus_1 >= 1, "");
+ FMT_ASSERT(beta_minus_1 < 64, "");
+
+ return ((umul96_lower64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0;
+ }
+
+ static carrier_uint compute_left_endpoint_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return static_cast<carrier_uint>(
+ (cache - (cache >> (float_info<float>::significand_bits + 2))) >>
+ (64 - float_info<float>::significand_bits - 1 - beta_minus_1));
+ }
+
+ static carrier_uint compute_right_endpoint_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return static_cast<carrier_uint>(
+ (cache + (cache >> (float_info<float>::significand_bits + 1))) >>
+ (64 - float_info<float>::significand_bits - 1 - beta_minus_1));
+ }
+
+ static carrier_uint compute_round_up_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return (static_cast<carrier_uint>(
+ cache >>
+ (64 - float_info<float>::significand_bits - 2 - beta_minus_1)) +
+ 1) /
+ 2;
+ }
+};
+
+template <> struct cache_accessor<double> {
+ using carrier_uint = float_info<double>::carrier_uint;
+ using cache_entry_type = uint128_wrapper;
+
+ static uint128_wrapper get_cached_power(int k) FMT_NOEXCEPT {
+ FMT_ASSERT(k >= float_info<double>::min_k && k <= float_info<double>::max_k,
+ "k is out of range");
+
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+ return data::dragonbox_pow10_significands_128[k -
+ float_info<double>::min_k];
+#else
+ static const int compression_ratio = 27;
+
+ // Compute base index.
+ int cache_index = (k - float_info<double>::min_k) / compression_ratio;
+ int kb = cache_index * compression_ratio + float_info<double>::min_k;
+ int offset = k - kb;
+
+ // Get base cache.
+ uint128_wrapper base_cache =
+ data::dragonbox_pow10_significands_128[cache_index];
+ if (offset == 0) return base_cache;
+
+ // Compute the required amount of bit-shift.
+ int alpha = floor_log2_pow10(kb + offset) - floor_log2_pow10(kb) - offset;
+ FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected");
+
+ // Try to recover the real cache.
+ uint64_t pow5 = data::powers_of_5_64[offset];
+ uint128_wrapper recovered_cache = umul128(base_cache.high(), pow5);
+ uint128_wrapper middle_low =
+ umul128(base_cache.low() - (kb < 0 ? 1u : 0u), pow5);
+
+ recovered_cache += middle_low.high();
+
+ uint64_t high_to_middle = recovered_cache.high() << (64 - alpha);
+ uint64_t middle_to_low = recovered_cache.low() << (64 - alpha);
+
+ recovered_cache =
+ uint128_wrapper{(recovered_cache.low() >> alpha) | high_to_middle,
+ ((middle_low.low() >> alpha) | middle_to_low)};
+
+ if (kb < 0) recovered_cache += 1;
+
+ // Get error.
+ int error_idx = (k - float_info<double>::min_k) / 16;
+ uint32_t error = (data::dragonbox_pow10_recovery_errors[error_idx] >>
+ ((k - float_info<double>::min_k) % 16) * 2) &
+ 0x3;
+
+ // Add the error back.
+ FMT_ASSERT(recovered_cache.low() + error >= recovered_cache.low(), "");
+ return {recovered_cache.high(), recovered_cache.low() + error};
+#endif
+ }
+
+ static carrier_uint compute_mul(carrier_uint u,
+ const cache_entry_type& cache) FMT_NOEXCEPT {
+ return umul192_upper64(u, cache);
+ }
+
+ static uint32_t compute_delta(cache_entry_type const& cache,
+ int beta_minus_1) FMT_NOEXCEPT {
+ return static_cast<uint32_t>(cache.high() >> (64 - 1 - beta_minus_1));
+ }
+
+ static bool compute_mul_parity(carrier_uint two_f,
+ const cache_entry_type& cache,
+ int beta_minus_1) FMT_NOEXCEPT {
+ FMT_ASSERT(beta_minus_1 >= 1, "");
+ FMT_ASSERT(beta_minus_1 < 64, "");
- digits::result on_start(uint64_t, uint64_t, uint64_t, int&) {
- return digits::more;
+ return ((umul192_middle64(two_f, cache) >> (64 - beta_minus_1)) & 1) != 0;
}
- // Decrement the generated number approaching value from above.
- void round(uint64_t d, uint64_t divisor, uint64_t& remainder,
- uint64_t error) {
- while (
- remainder < d && error - remainder >= divisor &&
- (remainder + divisor < d || d - remainder >= remainder + divisor - d)) {
- --buf[size - 1];
- remainder += divisor;
+ static carrier_uint compute_left_endpoint_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return (cache.high() -
+ (cache.high() >> (float_info<double>::significand_bits + 2))) >>
+ (64 - float_info<double>::significand_bits - 1 - beta_minus_1);
+ }
+
+ static carrier_uint compute_right_endpoint_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return (cache.high() +
+ (cache.high() >> (float_info<double>::significand_bits + 1))) >>
+ (64 - float_info<double>::significand_bits - 1 - beta_minus_1);
+ }
+
+ static carrier_uint compute_round_up_for_shorter_interval_case(
+ const cache_entry_type& cache, int beta_minus_1) FMT_NOEXCEPT {
+ return ((cache.high() >>
+ (64 - float_info<double>::significand_bits - 2 - beta_minus_1)) +
+ 1) /
+ 2;
+ }
+};
+
+// Various integer checks
+template <class T>
+bool is_left_endpoint_integer_shorter_interval(int exponent) FMT_NOEXCEPT {
+ return exponent >=
+ float_info<
+ T>::case_shorter_interval_left_endpoint_lower_threshold &&
+ exponent <=
+ float_info<T>::case_shorter_interval_left_endpoint_upper_threshold;
+}
+template <class T>
+bool is_endpoint_integer(typename float_info<T>::carrier_uint two_f,
+ int exponent, int minus_k) FMT_NOEXCEPT {
+ if (exponent < float_info<T>::case_fc_pm_half_lower_threshold) return false;
+ // For k >= 0.
+ if (exponent <= float_info<T>::case_fc_pm_half_upper_threshold) return true;
+ // For k < 0.
+ if (exponent > float_info<T>::divisibility_check_by_5_threshold) return false;
+ return divisible_by_power_of_5(two_f, minus_k);
+}
+
+template <class T>
+bool is_center_integer(typename float_info<T>::carrier_uint two_f, int exponent,
+ int minus_k) FMT_NOEXCEPT {
+ // Exponent for 5 is negative.
+ if (exponent > float_info<T>::divisibility_check_by_5_threshold) return false;
+ if (exponent > float_info<T>::case_fc_upper_threshold)
+ return divisible_by_power_of_5(two_f, minus_k);
+ // Both exponents are nonnegative.
+ if (exponent >= float_info<T>::case_fc_lower_threshold) return true;
+ // Exponent for 2 is negative.
+ return divisible_by_power_of_2(two_f, minus_k - exponent + 1);
+}
+
+// Remove trailing zeros from n and return the number of zeros removed (float)
+FMT_ALWAYS_INLINE int remove_trailing_zeros(uint32_t& n) FMT_NOEXCEPT {
+#ifdef FMT_BUILTIN_CTZ
+ int t = FMT_BUILTIN_CTZ(n);
+#else
+ int t = ctz(n);
+#endif
+ if (t > float_info<float>::max_trailing_zeros)
+ t = float_info<float>::max_trailing_zeros;
+
+ const uint32_t mod_inv1 = 0xcccccccd;
+ const uint32_t max_quotient1 = 0x33333333;
+ const uint32_t mod_inv2 = 0xc28f5c29;
+ const uint32_t max_quotient2 = 0x0a3d70a3;
+
+ int s = 0;
+ for (; s < t - 1; s += 2) {
+ if (n * mod_inv2 > max_quotient2) break;
+ n *= mod_inv2;
+ }
+ if (s < t && n * mod_inv1 <= max_quotient1) {
+ n *= mod_inv1;
+ ++s;
+ }
+ n >>= s;
+ return s;
+}
+
+// Removes trailing zeros and returns the number of zeros removed (double)
+FMT_ALWAYS_INLINE int remove_trailing_zeros(uint64_t& n) FMT_NOEXCEPT {
+#ifdef FMT_BUILTIN_CTZLL
+ int t = FMT_BUILTIN_CTZLL(n);
+#else
+ int t = ctzll(n);
+#endif
+ if (t > float_info<double>::max_trailing_zeros)
+ t = float_info<double>::max_trailing_zeros;
+ // Divide by 10^8 and reduce to 32-bits
+ // Since ret_value.significand <= (2^64 - 1) / 1000 < 10^17,
+ // both of the quotient and the r should fit in 32-bits
+
+ const uint32_t mod_inv1 = 0xcccccccd;
+ const uint32_t max_quotient1 = 0x33333333;
+ const uint64_t mod_inv8 = 0xc767074b22e90e21;
+ const uint64_t max_quotient8 = 0x00002af31dc46118;
+
+ // If the number is divisible by 1'0000'0000, work with the quotient
+ if (t >= 8) {
+ auto quotient_candidate = n * mod_inv8;
+
+ if (quotient_candidate <= max_quotient8) {
+ auto quotient = static_cast<uint32_t>(quotient_candidate >> 8);
+
+ int s = 8;
+ for (; s < t; ++s) {
+ if (quotient * mod_inv1 > max_quotient1) break;
+ quotient *= mod_inv1;
+ }
+ quotient >>= (s - 8);
+ n = quotient;
+ return s;
}
}
- // Implements Grisu's round_weed.
- digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder,
- uint64_t error, int exp, bool integral) {
- buf[size++] = digit;
- if (remainder >= error) return digits::more;
- uint64_t unit = integral ? 1 : data::powers_of_10_64[-exp];
- uint64_t up = (diff - 1) * unit; // wp_Wup
- round(up, divisor, remainder, error);
- uint64_t down = (diff + 1) * unit; // wp_Wdown
- if (remainder < down && error - remainder >= divisor &&
- (remainder + divisor < down ||
- down - remainder > remainder + divisor - down)) {
- return digits::error;
+ // Otherwise, work with the remainder
+ auto quotient = static_cast<uint32_t>(n / 100000000);
+ auto remainder = static_cast<uint32_t>(n - 100000000 * quotient);
+
+ if (t == 0 || remainder * mod_inv1 > max_quotient1) {
+ return 0;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 1 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 1) + quotient * 10000000ull;
+ return 1;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 2 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 2) + quotient * 1000000ull;
+ return 2;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 3 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 3) + quotient * 100000ull;
+ return 3;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 4 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 4) + quotient * 10000ull;
+ return 4;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 5 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 5) + quotient * 1000ull;
+ return 5;
+ }
+ remainder *= mod_inv1;
+
+ if (t == 6 || remainder * mod_inv1 > max_quotient1) {
+ n = (remainder >> 6) + quotient * 100ull;
+ return 6;
+ }
+ remainder *= mod_inv1;
+
+ n = (remainder >> 7) + quotient * 10ull;
+ return 7;
+}
+
+// The main algorithm for shorter interval case
+template <class T>
+FMT_ALWAYS_INLINE FMT_SAFEBUFFERS decimal_fp<T> shorter_interval_case(
+ int exponent) FMT_NOEXCEPT {
+ decimal_fp<T> ret_value;
+ // Compute k and beta
+ const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent);
+ const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k);
+
+ // Compute xi and zi
+ using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+ const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+
+ auto xi = cache_accessor<T>::compute_left_endpoint_for_shorter_interval_case(
+ cache, beta_minus_1);
+ auto zi = cache_accessor<T>::compute_right_endpoint_for_shorter_interval_case(
+ cache, beta_minus_1);
+
+ // If the left endpoint is not an integer, increase it
+ if (!is_left_endpoint_integer_shorter_interval<T>(exponent)) ++xi;
+
+ // Try bigger divisor
+ ret_value.significand = zi / 10;
+
+ // If succeed, remove trailing zeros if necessary and return
+ if (ret_value.significand * 10 >= xi) {
+ ret_value.exponent = minus_k + 1;
+ ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+ return ret_value;
+ }
+
+ // Otherwise, compute the round-up of y
+ ret_value.significand =
+ cache_accessor<T>::compute_round_up_for_shorter_interval_case(
+ cache, beta_minus_1);
+ ret_value.exponent = minus_k;
+
+ // When tie occurs, choose one of them according to the rule
+ if (exponent >= float_info<T>::shorter_interval_tie_lower_threshold &&
+ exponent <= float_info<T>::shorter_interval_tie_upper_threshold) {
+ ret_value.significand = ret_value.significand % 2 == 0
+ ? ret_value.significand
+ : ret_value.significand - 1;
+ } else if (ret_value.significand < xi) {
+ ++ret_value.significand;
+ }
+ return ret_value;
+}
+
+template <typename T>
+FMT_SAFEBUFFERS decimal_fp<T> to_decimal(T x) FMT_NOEXCEPT {
+ // Step 1: integer promotion & Schubfach multiplier calculation.
+
+ using carrier_uint = typename float_info<T>::carrier_uint;
+ using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+ auto br = bit_cast<carrier_uint>(x);
+
+ // Extract significand bits and exponent bits.
+ const carrier_uint significand_mask =
+ (static_cast<carrier_uint>(1) << float_info<T>::significand_bits) - 1;
+ carrier_uint significand = (br & significand_mask);
+ int exponent = static_cast<int>((br & exponent_mask<T>()) >>
+ float_info<T>::significand_bits);
+
+ if (exponent != 0) { // Check if normal.
+ exponent += float_info<T>::exponent_bias - float_info<T>::significand_bits;
+
+ // Shorter interval case; proceed like Schubfach.
+ if (significand == 0) return shorter_interval_case<T>(exponent);
+
+ significand |=
+ (static_cast<carrier_uint>(1) << float_info<T>::significand_bits);
+ } else {
+ // Subnormal case; the interval is always regular.
+ if (significand == 0) return {0, 0};
+ exponent = float_info<T>::min_exponent - float_info<T>::significand_bits;
+ }
+
+ const bool include_left_endpoint = (significand % 2 == 0);
+ const bool include_right_endpoint = include_left_endpoint;
+
+ // Compute k and beta.
+ const int minus_k = floor_log10_pow2(exponent) - float_info<T>::kappa;
+ const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+ const int beta_minus_1 = exponent + floor_log2_pow10(-minus_k);
+
+ // Compute zi and deltai
+ // 10^kappa <= deltai < 10^(kappa + 1)
+ const uint32_t deltai = cache_accessor<T>::compute_delta(cache, beta_minus_1);
+ const carrier_uint two_fc = significand << 1;
+ const carrier_uint two_fr = two_fc | 1;
+ const carrier_uint zi =
+ cache_accessor<T>::compute_mul(two_fr << beta_minus_1, cache);
+
+ // Step 2: Try larger divisor; remove trailing zeros if necessary
+
+ // Using an upper bound on zi, we might be able to optimize the division
+ // better than the compiler; we are computing zi / big_divisor here
+ decimal_fp<T> ret_value;
+ ret_value.significand = divide_by_10_to_kappa_plus_1(zi);
+ uint32_t r = static_cast<uint32_t>(zi - float_info<T>::big_divisor *
+ ret_value.significand);
+
+ if (r > deltai) {
+ goto small_divisor_case_label;
+ } else if (r < deltai) {
+ // Exclude the right endpoint if necessary
+ if (r == 0 && !include_right_endpoint &&
+ is_endpoint_integer<T>(two_fr, exponent, minus_k)) {
+ --ret_value.significand;
+ r = float_info<T>::big_divisor;
+ goto small_divisor_case_label;
+ }
+ } else {
+ // r == deltai; compare fractional parts
+ // Check conditions in the order different from the paper
+ // to take advantage of short-circuiting
+ const carrier_uint two_fl = two_fc - 1;
+ if ((!include_left_endpoint ||
+ !is_endpoint_integer<T>(two_fl, exponent, minus_k)) &&
+ !cache_accessor<T>::compute_mul_parity(two_fl, cache, beta_minus_1)) {
+ goto small_divisor_case_label;
}
- return 2 * unit <= remainder && remainder <= error - 4 * unit
- ? digits::done
- : digits::error;
}
-};
+ ret_value.exponent = minus_k + float_info<T>::kappa + 1;
+
+ // We may need to remove trailing zeros
+ ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+ return ret_value;
+
+ // Step 3: Find the significand with the smaller divisor
+
+small_divisor_case_label:
+ ret_value.significand *= 10;
+ ret_value.exponent = minus_k + float_info<T>::kappa;
+
+ const uint32_t mask = (1u << float_info<T>::kappa) - 1;
+ auto dist = r - (deltai / 2) + (float_info<T>::small_divisor / 2);
+
+ // Is dist divisible by 2^kappa?
+ if ((dist & mask) == 0) {
+ const bool approx_y_parity =
+ ((dist ^ (float_info<T>::small_divisor / 2)) & 1) != 0;
+ dist >>= float_info<T>::kappa;
+
+ // Is dist divisible by 5^kappa?
+ if (check_divisibility_and_divide_by_pow5<float_info<T>::kappa>(dist)) {
+ ret_value.significand += dist;
+
+ // Check z^(f) >= epsilon^(f)
+ // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1,
+ // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f)
+ // Since there are only 2 possibilities, we only need to care about the
+ // parity. Also, zi and r should have the same parity since the divisor
+ // is an even number
+ if (cache_accessor<T>::compute_mul_parity(two_fc, cache, beta_minus_1) !=
+ approx_y_parity) {
+ --ret_value.significand;
+ } else {
+ // If z^(f) >= epsilon^(f), we might have a tie
+ // when z^(f) == epsilon^(f), or equivalently, when y is an integer
+ if (is_center_integer<T>(two_fc, exponent, minus_k)) {
+ ret_value.significand = ret_value.significand % 2 == 0
+ ? ret_value.significand
+ : ret_value.significand - 1;
+ }
+ }
+ }
+ // Is dist not divisible by 5^kappa?
+ else {
+ ret_value.significand += dist;
+ }
+ }
+ // Is dist not divisible by 2^kappa?
+ else {
+ // Since we know dist is small, we might be able to optimize the division
+ // better than the compiler; we are computing dist / small_divisor here
+ ret_value.significand +=
+ small_division_by_pow10<float_info<T>::kappa>(dist);
+ }
+ return ret_value;
+}
+} // namespace dragonbox
// Formats value using a variation of the Fixed-Precision Positive
// Floating-Point Printout ((FPP)^2) algorithm by Steele & White:
// https://fmt.dev/p372-steele.pdf.
template <typename Double>
-void fallback_format(Double d, buffer<char>& buf, int& exp10) {
+void fallback_format(Double d, int num_digits, bool binary32, buffer<char>& buf,
+ int& exp10) {
bigint numerator; // 2 * R in (FPP)^2.
bigint denominator; // 2 * S in (FPP)^2.
// lower and upper are differences between value and corresponding boundaries.
// Shift numerator and denominator by an extra bit or two (if lower boundary
// is closer) to make lower and upper integers. This eliminates multiplication
// by 2 during later computations.
- // TODO: handle float
- int shift = value.assign(d) ? 2 : 1;
+ const bool is_predecessor_closer =
+ binary32 ? value.assign(static_cast<float>(d)) : value.assign(d);
+ int shift = is_predecessor_closer ? 2 : 1;
uint64_t significand = value.f << shift;
if (value.e >= 0) {
numerator.assign(significand);
upper = &upper_store;
}
denominator.assign_pow10(exp10);
- denominator <<= 1;
+ denominator <<= shift;
} else if (exp10 < 0) {
numerator.assign_pow10(-exp10);
lower.assign(numerator);
upper = &upper_store;
}
}
- if (!upper) upper = &lower;
// Invariant: value == (numerator / denominator) * pow(10, exp10).
- bool even = (value.f & 1) == 0;
- int num_digits = 0;
- char* data = buf.data();
- for (;;) {
- int digit = numerator.divmod_assign(denominator);
- bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower.
- // numerator + upper >[=] pow10:
- bool high = add_compare(numerator, *upper, denominator) + even > 0;
- data[num_digits++] = static_cast<char>('0' + digit);
- if (low || high) {
- if (!low) {
- ++data[num_digits - 1];
- } else if (high) {
- int result = add_compare(numerator, numerator, denominator);
- // Round half to even.
- if (result > 0 || (result == 0 && (digit % 2) != 0))
+ if (num_digits < 0) {
+ // Generate the shortest representation.
+ if (!upper) upper = &lower;
+ bool even = (value.f & 1) == 0;
+ num_digits = 0;
+ char* data = buf.data();
+ for (;;) {
+ int digit = numerator.divmod_assign(denominator);
+ bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower.
+ // numerator + upper >[=] pow10:
+ bool high = add_compare(numerator, *upper, denominator) + even > 0;
+ data[num_digits++] = static_cast<char>('0' + digit);
+ if (low || high) {
+ if (!low) {
++data[num_digits - 1];
+ } else if (high) {
+ int result = add_compare(numerator, numerator, denominator);
+ // Round half to even.
+ if (result > 0 || (result == 0 && (digit % 2) != 0))
+ ++data[num_digits - 1];
+ }
+ buf.try_resize(to_unsigned(num_digits));
+ exp10 -= num_digits - 1;
+ return;
}
- buf.resize(to_unsigned(num_digits));
- exp10 -= num_digits - 1;
- return;
+ numerator *= 10;
+ lower *= 10;
+ if (upper != &lower) *upper *= 10;
}
+ }
+ // Generate the given number of digits.
+ exp10 -= num_digits - 1;
+ if (num_digits == 0) {
+ buf.try_resize(1);
+ denominator *= 10;
+ buf[0] = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0';
+ return;
+ }
+ buf.try_resize(to_unsigned(num_digits));
+ for (int i = 0; i < num_digits - 1; ++i) {
+ int digit = numerator.divmod_assign(denominator);
+ buf[i] = static_cast<char>('0' + digit);
numerator *= 10;
- lower *= 10;
- if (upper != &lower) *upper *= 10;
}
+ int digit = numerator.divmod_assign(denominator);
+ auto result = add_compare(numerator, numerator, denominator);
+ if (result > 0 || (result == 0 && (digit % 2) != 0)) {
+ if (digit == 9) {
+ const auto overflow = '0' + 10;
+ buf[num_digits - 1] = overflow;
+ // Propagate the carry.
+ for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) {
+ buf[i] = '0';
+ ++buf[i - 1];
+ }
+ if (buf[0] == overflow) {
+ buf[0] = '1';
+ ++exp10;
+ }
+ return;
+ }
+ ++digit;
+ }
+ buf[num_digits - 1] = static_cast<char>('0' + digit);
}
-// Formats value using the Grisu algorithm
-// (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf)
-// if T is a IEEE754 binary32 or binary64 and snprintf otherwise.
template <typename T>
int format_float(T value, int precision, float_specs specs, buffer<char>& buf) {
static_assert(!std::is_same<T, float>::value, "");
buf.push_back('0');
return 0;
}
- buf.resize(to_unsigned(precision));
+ buf.try_resize(to_unsigned(precision));
std::uninitialized_fill_n(buf.data(), precision, '0');
return -precision;
}
if (!specs.use_grisu) return snprintf_float(value, precision, specs, buf);
+ if (precision < 0) {
+ // Use Dragonbox for the shortest format.
+ if (specs.binary32) {
+ auto dec = dragonbox::to_decimal(static_cast<float>(value));
+ write<char>(buffer_appender<char>(buf), dec.significand);
+ return dec.exponent;
+ }
+ auto dec = dragonbox::to_decimal(static_cast<double>(value));
+ write<char>(buffer_appender<char>(buf), dec.significand);
+ return dec.exponent;
+ }
+
+ // Use Grisu + Dragon4 for the given precision:
+ // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf.
int exp = 0;
const int min_exp = -60; // alpha in Grisu.
int cached_exp10 = 0; // K in Grisu.
- if (precision < 0) {
- fp fp_value;
- auto boundaries = specs.binary32
- ? fp_value.assign_float_with_boundaries(value)
- : fp_value.assign_with_boundaries(value);
- fp_value = normalize(fp_value);
- // Find a cached power of 10 such that multiplying value by it will bring
- // the exponent in the range [min_exp, -32].
- const fp cached_pow = get_cached_power(
- min_exp - (fp_value.e + fp::significand_size), cached_exp10);
- // Multiply value and boundaries by the cached power of 10.
- fp_value = fp_value * cached_pow;
- boundaries.lower = multiply(boundaries.lower, cached_pow.f);
- boundaries.upper = multiply(boundaries.upper, cached_pow.f);
- assert(min_exp <= fp_value.e && fp_value.e <= -32);
- --boundaries.lower; // \tilde{M}^- - 1 ulp -> M^-_{\downarrow}.
- ++boundaries.upper; // \tilde{M}^+ + 1 ulp -> M^+_{\uparrow}.
- // Numbers outside of (lower, upper) definitely do not round to value.
- grisu_shortest_handler handler{buf.data(), 0,
- boundaries.upper - fp_value.f};
- auto result =
- grisu_gen_digits(fp(boundaries.upper, fp_value.e),
- boundaries.upper - boundaries.lower, exp, handler);
- if (result == digits::error) {
- exp += handler.size - cached_exp10 - 1;
- fallback_format(value, buf, exp);
- return exp;
- }
- buf.resize(to_unsigned(handler.size));
+ fp normalized = normalize(fp(value));
+ const auto cached_pow = get_cached_power(
+ min_exp - (normalized.e + fp::significand_size), cached_exp10);
+ normalized = normalized * cached_pow;
+ // Limit precision to the maximum possible number of significant digits in an
+ // IEEE754 double because we don't need to generate zeros.
+ const int max_double_digits = 767;
+ if (precision > max_double_digits) precision = max_double_digits;
+ fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed};
+ if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error) {
+ exp += handler.size - cached_exp10 - 1;
+ fallback_format(value, handler.precision, specs.binary32, buf, exp);
} else {
- if (precision > 17) return snprintf_float(value, precision, specs, buf);
- fp normalized = normalize(fp(value));
- const auto cached_pow = get_cached_power(
- min_exp - (normalized.e + fp::significand_size), cached_exp10);
- normalized = normalized * cached_pow;
- fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed};
- if (grisu_gen_digits(normalized, 1, exp, handler) == digits::error)
- return snprintf_float(value, precision, specs, buf);
- int num_digits = handler.size;
- if (!fixed) {
- // Remove trailing zeros.
- while (num_digits > 0 && buf[num_digits - 1] == '0') {
- --num_digits;
- ++exp;
- }
+ exp += handler.exp10;
+ buf.try_resize(to_unsigned(handler.size));
+ }
+ if (!fixed && !specs.showpoint) {
+ // Remove trailing zeros.
+ auto num_digits = buf.size();
+ while (num_digits > 0 && buf[num_digits - 1] == '0') {
+ --num_digits;
+ ++exp;
}
- buf.resize(to_unsigned(num_digits));
+ buf.try_resize(num_digits);
}
- return exp - cached_exp10;
-}
+ return exp;
+} // namespace detail
template <typename T>
int snprintf_float(T value, int precision, float_specs specs,
? snprintf_ptr(begin, capacity, format, precision, value)
: snprintf_ptr(begin, capacity, format, value);
if (result < 0) {
- buf.reserve(buf.capacity() + 1); // The buffer will grow exponentially.
+ // The buffer will grow exponentially.
+ buf.try_reserve(buf.capacity() + 1);
continue;
}
auto size = to_unsigned(result);
// Size equal to capacity means that the last character was truncated.
if (size >= capacity) {
- buf.reserve(size + offset + 1); // Add 1 for the terminating '\0'.
+ buf.try_reserve(size + offset + 1); // Add 1 for the terminating '\0'.
continue;
}
auto is_digit = [](char c) { return c >= '0' && c <= '9'; };
if (specs.format == float_format::fixed) {
if (precision == 0) {
- buf.resize(size);
+ buf.try_resize(size);
return 0;
}
// Find and remove the decimal point.
} while (is_digit(*p));
int fraction_size = static_cast<int>(end - p - 1);
std::memmove(p, p + 1, to_unsigned(fraction_size));
- buf.resize(size - 1);
+ buf.try_resize(size - 1);
return -fraction_size;
}
if (specs.format == float_format::hex) {
- buf.resize(size + offset);
+ buf.try_resize(size + offset);
return 0;
}
// Find and parse the exponent.
fraction_size = static_cast<int>(fraction_end - begin - 1);
std::memmove(begin + 1, begin + 2, to_unsigned(fraction_size));
}
- buf.resize(to_unsigned(fraction_size) + offset + 1);
+ buf.try_resize(to_unsigned(fraction_size) + offset + 1);
return exp - fraction_size;
}
}
* occurs, this pointer will be a guess that depends on the particular
* error, but it will always advance at least one byte.
*/
-FMT_FUNC const char* utf8_decode(const char* buf, uint32_t* c, int* e) {
- static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 2, 2, 2, 2, 3, 3, 4, 0};
+inline const char* utf8_decode(const char* buf, uint32_t* c, int* e) {
static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
static const int shiftc[] = {0, 18, 12, 6, 0};
static const int shifte[] = {0, 6, 4, 2, 0};
- auto s = reinterpret_cast<const unsigned char*>(buf);
- int len = lengths[s[0] >> 3];
-
- // Compute the pointer to the next character early so that the next
- // iteration can start working on the next character. Neither Clang
- // nor GCC figure out this reordering on their own.
- const char* next = buf + len + !len;
+ int len = code_point_length(buf);
+ const char* next = buf + len;
// Assume a four-byte character and load four bytes. Unused bits are
// shifted out.
+ auto s = reinterpret_cast<const unsigned char*>(buf);
*c = uint32_t(s[0] & masks[len]) << 18;
*c |= uint32_t(s[1] & 0x3f) << 12;
*c |= uint32_t(s[2] & 0x3f) << 6;
return next;
}
+
+struct stringifier {
+ template <typename T> FMT_INLINE std::string operator()(T value) const {
+ return to_string(value);
+ }
+ std::string operator()(basic_format_arg<format_context>::handle h) const {
+ memory_buffer buf;
+ format_parse_context parse_ctx({});
+ format_context format_ctx(buffer_appender<char>(buf), {}, {});
+ h.format(parse_ctx, format_ctx);
+ return to_string(buf);
+ }
+};
} // namespace detail
template <> struct formatter<detail::bigint> {
int result =
detail::safe_strerror(error_code, system_message, buf.size());
if (result == 0) {
- format_to(std::back_inserter(out), "{}: {}", message, system_message);
+ format_to(detail::buffer_appender<char>(out), "{}: {}", message,
+ system_message);
return;
}
if (result != ERANGE)
report_error(format_system_error, error_code, message);
}
-struct stringifier {
- template <typename T> FMT_INLINE std::string operator()(T value) const {
- return to_string(value);
- }
- std::string operator()(basic_format_arg<format_context>::handle h) const {
- memory_buffer buf;
- detail::buffer<char>& base = buf;
- format_parse_context parse_ctx({});
- format_context format_ctx(std::back_inserter(base), {}, {});
- h.format(parse_ctx, format_ctx);
- return to_string(buf);
- }
-};
-
FMT_FUNC std::string detail::vformat(string_view format_str, format_args args) {
if (format_str.size() == 2 && equal2(format_str.data(), "{}")) {
auto arg = args.get(0);
return to_string(buffer);
}
+#ifdef _WIN32
+namespace detail {
+using dword = conditional_t<sizeof(long) == 4, unsigned long, unsigned>;
+extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( //
+ void*, const void*, dword, dword*, void*);
+} // namespace detail
+#endif
+
FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) {
memory_buffer buffer;
detail::vformat_to(buffer, format_str,
auto fd = _fileno(f);
if (_isatty(fd)) {
detail::utf8_to_utf16 u16(string_view(buffer.data(), buffer.size()));
- auto written = DWORD();
- if (!WriteConsoleW(reinterpret_cast<HANDLE>(_get_osfhandle(fd)),
- u16.c_str(), static_cast<DWORD>(u16.size()), &written,
- nullptr)) {
+ auto written = detail::dword();
+ if (!detail::WriteConsoleW(reinterpret_cast<void*>(_get_osfhandle(fd)),
+ u16.c_str(), static_cast<uint32_t>(u16.size()),
+ &written, nullptr)) {
FMT_THROW(format_error("failed to write to console"));
}
return;
FMT_END_NAMESPACE
-#ifdef _MSC_VER
-# pragma warning(pop)
-#endif
-
#endif // FMT_FORMAT_INL_H_
#endif
#if __cplusplus == 201103L || __cplusplus == 201402L
-# if defined(__clang__)
+# if defined(__INTEL_COMPILER) || defined(__PGI)
+# define FMT_FALLTHROUGH
+# elif defined(__clang__)
# define FMT_FALLTHROUGH [[clang::fallthrough]]
-# elif FMT_GCC_VERSION >= 700 && !defined(__PGI) && \
+# elif FMT_GCC_VERSION >= 700 && \
(!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520)
# define FMT_FALLTHROUGH [[gnu::fallthrough]]
# else
#endif
#ifndef FMT_USE_UDL_TEMPLATE
-// EDG frontend based compilers (icc, nvcc, etc) and GCC < 6.4 do not properly
-// support UDL templates and GCC >= 9 warns about them.
+// EDG frontend based compilers (icc, nvcc, PGI, etc) and GCC < 6.4 do not
+// properly support UDL templates and GCC >= 9 warns about them.
# if FMT_USE_USER_DEFINED_LITERALS && \
(!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 501) && \
((FMT_GCC_VERSION >= 604 && __cplusplus >= 201402L) || \
- FMT_CLANG_VERSION >= 304)
+ FMT_CLANG_VERSION >= 304) && \
+ !defined(__PGI) && !defined(__NVCC__)
# define FMT_USE_UDL_TEMPLATE 1
# else
# define FMT_USE_UDL_TEMPLATE 0
# define FMT_USE_LONG_DOUBLE 1
#endif
+// Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of
+// int_writer template instances to just one by only using the largest integer
+// type. This results in a reduction in binary size but will cause a decrease in
+// integer formatting performance.
+#if !defined(FMT_REDUCE_INT_INSTANTIATIONS)
+# define FMT_REDUCE_INT_INSTANTIATIONS 0
+#endif
+
// __builtin_clz is broken in clang with Microsoft CodeGen:
// https://github.com/fmtlib/fmt/issues/519
#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER
#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER
# define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n)
#endif
+#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctz))
+# define FMT_BUILTIN_CTZ(n) __builtin_ctz(n)
+#endif
+#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_ctzll))
+# define FMT_BUILTIN_CTZLL(n) __builtin_ctzll(n)
+#endif
+
+#if FMT_MSC_VER
+# include <intrin.h> // _BitScanReverse[64], _BitScanForward[64], _umul128
+#endif
// Some compilers masquerade as both MSVC and GCC-likes or otherwise support
// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the
// MSVC intrinsics if the clz and clzll builtins are not available.
-#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED)
-# include <intrin.h> // _BitScanReverse, _BitScanReverse64
-
+#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && \
+ !defined(FMT_BUILTIN_CTZLL) && !defined(_MANAGED)
FMT_BEGIN_NAMESPACE
namespace detail {
// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning.
# ifndef __clang__
+# pragma intrinsic(_BitScanForward)
# pragma intrinsic(_BitScanReverse)
# endif
-inline uint32_t clz(uint32_t x) {
+# if defined(_WIN64) && !defined(__clang__)
+# pragma intrinsic(_BitScanForward64)
+# pragma intrinsic(_BitScanReverse64)
+# endif
+
+inline int clz(uint32_t x) {
unsigned long r = 0;
_BitScanReverse(&r, x);
-
FMT_ASSERT(x != 0, "");
// Static analysis complains about using uninitialized data
// "r", but the only way that can happen is if "x" is 0,
// which the callers guarantee to not happen.
FMT_SUPPRESS_MSC_WARNING(6102)
- return 31 - r;
+ return 31 ^ static_cast<int>(r);
}
# define FMT_BUILTIN_CLZ(n) detail::clz(n)
-# if defined(_WIN64) && !defined(__clang__)
-# pragma intrinsic(_BitScanReverse64)
-# endif
-
-inline uint32_t clzll(uint64_t x) {
+inline int clzll(uint64_t x) {
unsigned long r = 0;
# ifdef _WIN64
_BitScanReverse64(&r, x);
# else
// Scan the high 32 bits.
- if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32))) return 63 - (r + 32);
-
+ if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32))) return 63 ^ (r + 32);
// Scan the low 32 bits.
_BitScanReverse(&r, static_cast<uint32_t>(x));
# endif
-
FMT_ASSERT(x != 0, "");
- // Static analysis complains about using uninitialized data
- // "r", but the only way that can happen is if "x" is 0,
- // which the callers guarantee to not happen.
- FMT_SUPPRESS_MSC_WARNING(6102)
- return 63 - r;
+ FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning.
+ return 63 ^ static_cast<int>(r);
}
# define FMT_BUILTIN_CLZLL(n) detail::clzll(n)
+
+inline int ctz(uint32_t x) {
+ unsigned long r = 0;
+ _BitScanForward(&r, x);
+ FMT_ASSERT(x != 0, "");
+ FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning.
+ return static_cast<int>(r);
+}
+# define FMT_BUILTIN_CTZ(n) detail::ctz(n)
+
+inline int ctzll(uint64_t x) {
+ unsigned long r = 0;
+ FMT_ASSERT(x != 0, "");
+ FMT_SUPPRESS_MSC_WARNING(6102) // Suppress a bogus static analysis warning.
+# ifdef _WIN64
+ _BitScanForward64(&r, x);
+# else
+ // Scan the low 32 bits.
+ if (_BitScanForward(&r, static_cast<uint32_t>(x))) return static_cast<int>(r);
+ // Scan the high 32 bits.
+ _BitScanForward(&r, static_cast<uint32_t>(x >> 32));
+ r += 32;
+# endif
+ return static_cast<int>(r);
+}
+# define FMT_BUILTIN_CTZLL(n) detail::ctzll(n)
} // namespace detail
FMT_END_NAMESPACE
#endif
#endif
}
-// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
-template <typename... Ts> struct void_t_impl { using type = void; };
-
-template <typename... Ts>
-using void_t = typename detail::void_t_impl<Ts...>::type;
-
// An approximation of iterator_t for pre-C++20 systems.
template <typename T>
using iterator_t = decltype(std::begin(std::declval<T&>()));
template <typename T> using sentinel_t = decltype(std::end(std::declval<T&>()));
-// Detect the iterator category of *any* given type in a SFINAE-friendly way.
-// Unfortunately, older implementations of std::iterator_traits are not safe
-// for use in a SFINAE-context.
-template <typename It, typename Enable = void>
-struct iterator_category : std::false_type {};
-
-template <typename T> struct iterator_category<T*> {
- using type = std::random_access_iterator_tag;
-};
-
-template <typename It>
-struct iterator_category<It, void_t<typename It::iterator_category>> {
- using type = typename It::iterator_category;
-};
-
-// Detect if *any* given type models the OutputIterator concept.
-template <typename It> class is_output_iterator {
- // Check for mutability because all iterator categories derived from
- // std::input_iterator_tag *may* also meet the requirements of an
- // OutputIterator, thereby falling into the category of 'mutable iterators'
- // [iterator.requirements.general] clause 4. The compiler reveals this
- // property only at the point of *actually dereferencing* the iterator!
- template <typename U>
- static decltype(*(std::declval<U>())) test(std::input_iterator_tag);
- template <typename U> static char& test(std::output_iterator_tag);
- template <typename U> static const char& test(...);
-
- using type = decltype(test<It>(typename iterator_category<It>::type{}));
-
- public:
- enum { value = !std::is_const<remove_reference_t<type>>::value };
-};
-
// A workaround for std::string not having mutable data() until C++17.
template <typename Char> inline Char* get_data(std::basic_string<Char>& s) {
return &s[0];
return make_checked(get_data(c) + size, n);
}
+template <typename T>
+inline buffer_appender<T> reserve(buffer_appender<T> it, size_t n) {
+ buffer<T>& buf = get_container(it);
+ buf.try_reserve(buf.size() + n);
+ return it;
+}
+
template <typename Iterator> inline Iterator& reserve(Iterator& it, size_t) {
return it;
}
+template <typename T, typename OutputIt>
+constexpr T* to_pointer(OutputIt, size_t) {
+ return nullptr;
+}
+template <typename T> T* to_pointer(buffer_appender<T> it, size_t n) {
+ buffer<T>& buf = get_container(it);
+ auto size = buf.size();
+ if (buf.capacity() < size + n) return nullptr;
+ buf.try_resize(size + n);
+ return buf.data() + size;
+}
+
template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
inline std::back_insert_iterator<Container> base_iterator(
std::back_insert_iterator<Container>& it,
++count_;
return *this;
}
-
counting_iterator operator++(int) {
auto it = *this;
++*this;
return it;
}
+ friend counting_iterator operator+(counting_iterator it, difference_type n) {
+ it.count_ += static_cast<size_t>(n);
+ return it;
+ }
+
value_type operator*() const { return {}; }
};
[](char c) { return static_cast<char8_type>(c); });
}
-#ifndef FMT_USE_GRISU
-# define FMT_USE_GRISU 1
-#endif
-
-template <typename T> constexpr bool use_grisu() {
- return FMT_USE_GRISU && std::numeric_limits<double>::is_iec559 &&
- sizeof(T) <= sizeof(double);
+template <typename Char, typename InputIt>
+inline counting_iterator copy_str(InputIt begin, InputIt end,
+ counting_iterator it) {
+ return it + (end - begin);
}
+template <typename T>
+using is_fast_float = bool_constant<std::numeric_limits<T>::is_iec559 &&
+ sizeof(T) <= sizeof(double)>;
+
+#ifndef FMT_USE_FULL_CACHE_DRAGONBOX
+# define FMT_USE_FULL_CACHE_DRAGONBOX 0
+#endif
+
template <typename T>
template <typename U>
void buffer<T>::append(const U* begin, const U* end) {
- size_t new_size = size_ + to_unsigned(end - begin);
- reserve(new_size);
- std::uninitialized_copy(begin, end,
- make_checked(ptr_ + size_, capacity_ - size_));
- size_ = new_size;
+ do {
+ auto count = to_unsigned(end - begin);
+ try_reserve(size_ + count);
+ auto free_cap = capacity_ - size_;
+ if (free_cap < count) count = free_cap;
+ std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count));
+ size_ += count;
+ begin += count;
+ } while (begin != end);
+}
+
+template <typename OutputIt, typename T, typename Traits>
+void iterator_buffer<OutputIt, T, Traits>::flush() {
+ out_ = std::copy_n(data_, this->limit(this->size()), out_);
+ this->clear();
}
} // namespace detail
*/
template <typename T, size_t SIZE = inline_buffer_size,
typename Allocator = std::allocator<T>>
-class basic_memory_buffer : public detail::buffer<T> {
+class basic_memory_buffer final : public detail::buffer<T> {
private:
T store_[SIZE];
}
protected:
- void grow(size_t size) FMT_OVERRIDE;
+ void grow(size_t size) final FMT_OVERRIDE;
public:
using value_type = T;
: alloc_(alloc) {
this->set(store_, SIZE);
}
- ~basic_memory_buffer() FMT_OVERRIDE { deallocate(); }
+ ~basic_memory_buffer() { deallocate(); }
private:
// Move data from other to this buffer.
// Returns a copy of the allocator associated with this buffer.
Allocator get_allocator() const { return alloc_; }
+
+ /**
+ Resizes the buffer to contain *count* elements. If T is a POD type new
+ elements may not be initialized.
+ */
+ void resize(size_t count) { this->try_resize(count); }
+
+ /** Increases the buffer capacity to *new_capacity*. */
+ void reserve(size_t new_capacity) { this->try_reserve(new_capacity); }
+
+ // Directly append data into the buffer
+ using detail::buffer<T>::append;
+ template <typename ContiguousRange>
+ void append(const ContiguousRange& range) {
+ append(range.data(), range.data() + range.size());
+ }
};
template <typename T, size_t SIZE, typename Allocator>
}
// Smallest of uint32_t, uint64_t, uint128_t that is large enough to
-// represent all values of T.
+// represent all values of an integral type T.
template <typename T>
using uint32_or_64_or_128_t =
- conditional_t<num_bits<T>() <= 32, uint32_t,
+ conditional_t<num_bits<T>() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS,
+ uint32_t,
conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>>;
+// 128-bit integer type used internally
+struct FMT_EXTERN_TEMPLATE_API uint128_wrapper {
+ uint128_wrapper() = default;
+
+#if FMT_USE_INT128
+ uint128_t internal_;
+
+ uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT
+ : internal_{static_cast<uint128_t>(low) |
+ (static_cast<uint128_t>(high) << 64)} {}
+
+ uint128_wrapper(uint128_t u) : internal_{u} {}
+
+ uint64_t high() const FMT_NOEXCEPT { return uint64_t(internal_ >> 64); }
+ uint64_t low() const FMT_NOEXCEPT { return uint64_t(internal_); }
+
+ uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT {
+ internal_ += n;
+ return *this;
+ }
+#else
+ uint64_t high_;
+ uint64_t low_;
+
+ uint128_wrapper(uint64_t high, uint64_t low) FMT_NOEXCEPT : high_{high},
+ low_{low} {}
+
+ uint64_t high() const FMT_NOEXCEPT { return high_; }
+ uint64_t low() const FMT_NOEXCEPT { return low_; }
+
+ uint128_wrapper& operator+=(uint64_t n) FMT_NOEXCEPT {
+# if defined(_MSC_VER) && defined(_M_X64)
+ unsigned char carry = _addcarry_u64(0, low_, n, &low_);
+ _addcarry_u64(carry, high_, 0, &high_);
+ return *this;
+# else
+ uint64_t sum = low_ + n;
+ high_ += (sum < low_ ? 1 : 0);
+ low_ = sum;
+ return *this;
+# endif
+ }
+#endif
+};
+
+// Table entry type for divisibility test used internally
+template <typename T> struct FMT_EXTERN_TEMPLATE_API divtest_table_entry {
+ T mod_inv;
+ T max_quotient;
+};
+
// Static data is placed in this class template for the header-only config.
template <typename T = void> struct FMT_EXTERN_TEMPLATE_API basic_data {
static const uint64_t powers_of_10_64[];
- static const uint32_t zero_or_powers_of_10_32[];
- static const uint64_t zero_or_powers_of_10_64[];
- static const uint64_t pow10_significands[];
- static const int16_t pow10_exponents[];
+ static const uint32_t zero_or_powers_of_10_32_new[];
+ static const uint64_t zero_or_powers_of_10_64_new[];
+ static const uint64_t grisu_pow10_significands[];
+ static const int16_t grisu_pow10_exponents[];
+ static const divtest_table_entry<uint32_t> divtest_table_for_pow5_32[];
+ static const divtest_table_entry<uint64_t> divtest_table_for_pow5_64[];
+ static const uint64_t dragonbox_pow10_significands_64[];
+ static const uint128_wrapper dragonbox_pow10_significands_128[];
+ // log10(2) = 0x0.4d104d427de7fbcc...
+ static const uint64_t log10_2_significand = 0x4d104d427de7fbcc;
+#if !FMT_USE_FULL_CACHE_DRAGONBOX
+ static const uint64_t powers_of_5_64[];
+ static const uint32_t dragonbox_pow10_recovery_errors[];
+#endif
// GCC generates slightly better code for pairs than chars.
using digit_pair = char[2];
static const digit_pair digits[];
static const char signs[];
static const char left_padding_shifts[5];
static const char right_padding_shifts[5];
+
+ // DEPRECATED! These are for ABI compatibility.
+ static const uint32_t zero_or_powers_of_10_32[];
+ static const uint64_t zero_or_powers_of_10_64[];
};
+// Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)).
+// This is a function instead of an array to workaround a bug in GCC10 (#1810).
+FMT_INLINE uint16_t bsr2log10(int bsr) {
+ static constexpr uint16_t data[] = {
+ 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5,
+ 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10,
+ 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
+ 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20};
+ return data[bsr];
+}
+
#ifndef FMT_EXPORTED
FMT_EXTERN template struct basic_data<void>;
#endif
// Returns the number of decimal digits in n. Leading zeros are not counted
// except for n == 0 in which case count_digits returns 1.
inline int count_digits(uint64_t n) {
- // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits.
- int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12;
- return t - (n < data::zero_or_powers_of_10_64[t]) + 1;
+ // https://github.com/fmtlib/format-benchmark/blob/master/digits10
+ auto t = bsr2log10(FMT_BUILTIN_CLZLL(n | 1) ^ 63);
+ return t - (n < data::zero_or_powers_of_10_64_new[t]);
}
#else
// Fallback version of count_digits used when __builtin_clz is not available.
#if FMT_GCC_VERSION || FMT_CLANG_VERSION
# define FMT_ALWAYS_INLINE inline __attribute__((always_inline))
+#elif FMT_MSC_VER
+# define FMT_ALWAYS_INLINE __forceinline
#else
-# define FMT_ALWAYS_INLINE
+# define FMT_ALWAYS_INLINE inline
+#endif
+
+// To suppress unnecessary security cookie checks
+#if FMT_MSC_VER && !FMT_CLANG_VERSION
+# define FMT_SAFEBUFFERS __declspec(safebuffers)
+#else
+# define FMT_SAFEBUFFERS
#endif
#ifdef FMT_BUILTIN_CLZ
// Optional version of count_digits for better performance on 32-bit platforms.
inline int count_digits(uint32_t n) {
- int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12;
- return t - (n < data::zero_or_powers_of_10_32[t]) + 1;
+ auto t = bsr2log10(FMT_BUILTIN_CLZ(n | 1) ^ 31);
+ return t - (n < data::zero_or_powers_of_10_32_new[t]);
}
#endif
*dst++ = static_cast<Char>(*src++);
*dst = static_cast<Char>(*src);
}
-inline void copy2(char* dst, const char* src) { memcpy(dst, src, 2); }
+FMT_INLINE void copy2(char* dst, const char* src) { memcpy(dst, src, 2); }
template <typename Iterator> struct format_decimal_result {
Iterator begin;
template <typename Char, typename UInt, typename Iterator,
FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<Iterator>>::value)>
inline format_decimal_result<Iterator> format_decimal(Iterator out, UInt value,
- int num_digits) {
- // Buffer should be large enough to hold all digits (<= digits10 + 1).
- enum { max_size = digits10<UInt>() + 1 };
- Char buffer[2 * max_size];
- auto end = format_decimal(buffer, value, num_digits).end;
+ int size) {
+ // Buffer is large enough to hold all digits (digits10 + 1).
+ Char buffer[digits10<UInt>() + 1];
+ auto end = format_decimal(buffer, value, size).end;
return {out, detail::copy_str<Char>(buffer, end, out)};
}
template <unsigned BASE_BITS, typename Char, typename It, typename UInt>
inline It format_uint(It out, UInt value, int num_digits, bool upper = false) {
+ if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
+ format_uint<BASE_BITS>(ptr, value, num_digits, upper);
+ return out;
+ }
// Buffer should be large enough to hold all digits (digits / BASE_BITS + 1).
char buffer[num_bits<UInt>() / BASE_BITS + 1];
format_uint<BASE_BITS>(buffer, value, num_digits, upper);
template <typename Char> struct fill_t {
private:
enum { max_size = 4 };
- Char data_[max_size];
- unsigned char size_;
+ Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)};
+ unsigned char size_ = 1;
public:
FMT_CONSTEXPR void operator=(basic_string_view<Char> s) {
FMT_CONSTEXPR const Char& operator[](size_t index) const {
return data_[index];
}
-
- static FMT_CONSTEXPR fill_t<Char> make() {
- auto fill = fill_t<Char>();
- fill[0] = Char(' ');
- fill.size_ = 1;
- return fill;
- }
};
} // namespace detail
type(0),
align(align::none),
sign(sign::none),
- alt(false),
- fill(detail::fill_t<Char>::make()) {}
+ alt(false) {}
};
using format_specs = basic_format_specs<char>;
namespace detail {
+namespace dragonbox {
+
+// Type-specific information that Dragonbox uses.
+template <class T> struct float_info;
+
+template <> struct float_info<float> {
+ using carrier_uint = uint32_t;
+ static const int significand_bits = 23;
+ static const int exponent_bits = 8;
+ static const int min_exponent = -126;
+ static const int max_exponent = 127;
+ static const int exponent_bias = -127;
+ static const int decimal_digits = 9;
+ static const int kappa = 1;
+ static const int big_divisor = 100;
+ static const int small_divisor = 10;
+ static const int min_k = -31;
+ static const int max_k = 46;
+ static const int cache_bits = 64;
+ static const int divisibility_check_by_5_threshold = 39;
+ static const int case_fc_pm_half_lower_threshold = -1;
+ static const int case_fc_pm_half_upper_threshold = 6;
+ static const int case_fc_lower_threshold = -2;
+ static const int case_fc_upper_threshold = 6;
+ static const int case_shorter_interval_left_endpoint_lower_threshold = 2;
+ static const int case_shorter_interval_left_endpoint_upper_threshold = 3;
+ static const int shorter_interval_tie_lower_threshold = -35;
+ static const int shorter_interval_tie_upper_threshold = -35;
+ static const int max_trailing_zeros = 7;
+};
+
+template <> struct float_info<double> {
+ using carrier_uint = uint64_t;
+ static const int significand_bits = 52;
+ static const int exponent_bits = 11;
+ static const int min_exponent = -1022;
+ static const int max_exponent = 1023;
+ static const int exponent_bias = -1023;
+ static const int decimal_digits = 17;
+ static const int kappa = 2;
+ static const int big_divisor = 1000;
+ static const int small_divisor = 100;
+ static const int min_k = -292;
+ static const int max_k = 326;
+ static const int cache_bits = 128;
+ static const int divisibility_check_by_5_threshold = 86;
+ static const int case_fc_pm_half_lower_threshold = -2;
+ static const int case_fc_pm_half_upper_threshold = 9;
+ static const int case_fc_lower_threshold = -4;
+ static const int case_fc_upper_threshold = 9;
+ static const int case_shorter_interval_left_endpoint_lower_threshold = 2;
+ static const int case_shorter_interval_left_endpoint_upper_threshold = 3;
+ static const int shorter_interval_tie_lower_threshold = -77;
+ static const int shorter_interval_tie_upper_threshold = -77;
+ static const int max_trailing_zeros = 16;
+};
+
+template <typename T> struct decimal_fp {
+ using significand_type = typename float_info<T>::carrier_uint;
+ significand_type significand;
+ int exponent;
+};
+
+template <typename T> FMT_API decimal_fp<T> to_decimal(T x) FMT_NOEXCEPT;
+} // namespace dragonbox
+
+template <typename T>
+constexpr typename dragonbox::float_info<T>::carrier_uint exponent_mask() {
+ using uint = typename dragonbox::float_info<T>::carrier_uint;
+ return ((uint(1) << dragonbox::float_info<T>::exponent_bits) - 1)
+ << dragonbox::float_info<T>::significand_bits;
+}
// A floating-point presentation format.
enum class float_format : unsigned char {
return it;
}
-template <typename Char> class float_writer {
- private:
- // The number is given as v = digits_ * pow(10, exp_).
- const char* digits_;
- int num_digits_;
- int exp_;
- size_t size_;
- float_specs specs_;
- Char decimal_point_;
-
- template <typename It> It prettify(It it) const {
- // pow(10, full_exp - 1) <= v <= pow(10, full_exp).
- int full_exp = num_digits_ + exp_;
- if (specs_.format == float_format::exp) {
- // Insert a decimal point after the first digit and add an exponent.
- *it++ = static_cast<Char>(*digits_);
- int num_zeros = specs_.precision - num_digits_;
- if (num_digits_ > 1 || specs_.showpoint) *it++ = decimal_point_;
- it = copy_str<Char>(digits_ + 1, digits_ + num_digits_, it);
- if (num_zeros > 0 && specs_.showpoint)
- it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
- *it++ = static_cast<Char>(specs_.upper ? 'E' : 'e');
- return write_exponent<Char>(full_exp - 1, it);
- }
- if (num_digits_ <= full_exp) {
- // 1234e7 -> 12340000000[.0+]
- it = copy_str<Char>(digits_, digits_ + num_digits_, it);
- it = std::fill_n(it, full_exp - num_digits_, static_cast<Char>('0'));
- if (specs_.showpoint || specs_.precision < 0) {
- *it++ = decimal_point_;
- int num_zeros = specs_.precision - full_exp;
- if (num_zeros <= 0) {
- if (specs_.format != float_format::fixed)
- *it++ = static_cast<Char>('0');
- return it;
- }
-#ifdef FMT_FUZZ
- if (num_zeros > 5000)
- throw std::runtime_error("fuzz mode - avoiding excessive cpu use");
-#endif
- it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
- }
- } else if (full_exp > 0) {
- // 1234e-2 -> 12.34[0+]
- it = copy_str<Char>(digits_, digits_ + full_exp, it);
- if (!specs_.showpoint) {
- // Remove trailing zeros.
- int num_digits = num_digits_;
- while (num_digits > full_exp && digits_[num_digits - 1] == '0')
- --num_digits;
- if (num_digits != full_exp) *it++ = decimal_point_;
- return copy_str<Char>(digits_ + full_exp, digits_ + num_digits, it);
- }
- *it++ = decimal_point_;
- it = copy_str<Char>(digits_ + full_exp, digits_ + num_digits_, it);
- if (specs_.precision > num_digits_) {
- // Add trailing zeros.
- int num_zeros = specs_.precision - num_digits_;
- it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
- }
- } else {
- // 1234e-6 -> 0.001234
- *it++ = static_cast<Char>('0');
- int num_zeros = -full_exp;
- int num_digits = num_digits_;
- if (num_digits == 0 && specs_.precision >= 0 &&
- specs_.precision < num_zeros) {
- num_zeros = specs_.precision;
- }
- // Remove trailing zeros.
- if (!specs_.showpoint)
- while (num_digits > 0 && digits_[num_digits - 1] == '0') --num_digits;
- if (num_zeros != 0 || num_digits != 0 || specs_.showpoint) {
- *it++ = decimal_point_;
- it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
- it = copy_str<Char>(digits_, digits_ + num_digits, it);
- }
- }
- return it;
- }
-
- public:
- float_writer(const char* digits, int num_digits, int exp, float_specs specs,
- Char decimal_point)
- : digits_(digits),
- num_digits_(num_digits),
- exp_(exp),
- specs_(specs),
- decimal_point_(decimal_point) {
- int full_exp = num_digits + exp - 1;
- int precision = specs.precision > 0 ? specs.precision : 16;
- if (specs_.format == float_format::general &&
- !(full_exp >= -4 && full_exp < precision)) {
- specs_.format = float_format::exp;
- }
- size_ = prettify(counting_iterator()).count();
- size_ += specs.sign ? 1 : 0;
- }
-
- size_t size() const { return size_; }
-
- template <typename It> It operator()(It it) const {
- if (specs_.sign) *it++ = static_cast<Char>(data::signs[specs_.sign]);
- return prettify(it);
- }
-};
-
template <typename T>
int format_float(T value, int precision, float_specs specs, buffer<char>& buf);
typename F>
inline OutputIt write_padded(OutputIt out,
const basic_format_specs<Char>& specs, size_t size,
- size_t width, const F& f) {
+ size_t width, F&& f) {
static_assert(align == align::left || align == align::right, "");
unsigned spec_width = to_unsigned(specs.width);
size_t padding = spec_width > width ? spec_width - width : 0;
typename F>
inline OutputIt write_padded(OutputIt out,
const basic_format_specs<Char>& specs, size_t size,
- const F& f) {
+ F&& f) {
return write_padded<align>(out, specs, size, size, f);
}
char digits[40];
format_decimal(digits, abs_value, num_digits);
basic_memory_buffer<Char> buffer;
- size += prefix_size;
- buffer.resize(size);
+ size += static_cast<int>(prefix_size);
+ const auto usize = to_unsigned(size);
+ buffer.resize(usize);
basic_string_view<Char> s(&sep, sep_size);
// Index of a decimal digit with the least significant digit having index 0.
int digit_index = 0;
group = groups.cbegin();
- auto p = buffer.data() + size;
- for (int i = num_digits - 1; i >= 0; --i) {
- *--p = static_cast<Char>(digits[i]);
+ auto p = buffer.data() + size - 1;
+ for (int i = num_digits - 1; i > 0; --i) {
+ *p-- = static_cast<Char>(digits[i]);
if (*group <= 0 || ++digit_index % *group != 0 ||
*group == max_value<char>())
continue;
digit_index = 0;
++group;
}
- p -= s.size();
std::uninitialized_copy(s.data(), s.data() + s.size(),
make_checked(p, s.size()));
+ p -= s.size();
}
- if (prefix_size != 0) p[-1] = static_cast<Char>('-');
- using iterator = remove_reference_t<decltype(reserve(out, 0))>;
+ *p-- = static_cast<Char>(*digits);
+ if (prefix_size != 0) *p = static_cast<Char>('-');
auto data = buffer.data();
- out = write_padded<align::right>(out, specs, size, size, [=](iterator it) {
- return copy_str<Char>(data, data + size, it);
- });
+ out = write_padded<align::right>(
+ out, specs, usize, usize,
+ [=](iterator it) { return copy_str<Char>(data, data + size, it); });
}
void on_chr() { *out++ = static_cast<Char>(abs_value); }
});
}
+// A decimal floating-point number significand * pow(10, exp).
+struct big_decimal_fp {
+ const char* significand;
+ int significand_size;
+ int exponent;
+};
+
+inline int get_significand_size(const big_decimal_fp& fp) {
+ return fp.significand_size;
+}
+template <typename T>
+inline int get_significand_size(const dragonbox::decimal_fp<T>& fp) {
+ return count_digits(fp.significand);
+}
+
+template <typename Char, typename OutputIt>
+inline OutputIt write_significand(OutputIt out, const char* significand,
+ int& significand_size) {
+ return copy_str<Char>(significand, significand + significand_size, out);
+}
+template <typename Char, typename OutputIt, typename UInt>
+inline OutputIt write_significand(OutputIt out, UInt significand,
+ int significand_size) {
+ return format_decimal<Char>(out, significand, significand_size).end;
+}
+
+template <typename Char, typename UInt,
+ FMT_ENABLE_IF(std::is_integral<UInt>::value)>
+inline Char* write_significand(Char* out, UInt significand,
+ int significand_size, int integral_size,
+ Char decimal_point) {
+ if (!decimal_point)
+ return format_decimal(out, significand, significand_size).end;
+ auto end = format_decimal(out + 1, significand, significand_size).end;
+ if (integral_size == 1)
+ out[0] = out[1];
+ else
+ std::copy_n(out + 1, integral_size, out);
+ out[integral_size] = decimal_point;
+ return end;
+}
+
+template <typename OutputIt, typename UInt, typename Char,
+ FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<OutputIt>>::value)>
+inline OutputIt write_significand(OutputIt out, UInt significand,
+ int significand_size, int integral_size,
+ Char decimal_point) {
+ // Buffer is large enough to hold digits (digits10 + 1) and a decimal point.
+ Char buffer[digits10<UInt>() + 2];
+ auto end = write_significand(buffer, significand, significand_size,
+ integral_size, decimal_point);
+ return detail::copy_str<Char>(buffer, end, out);
+}
+
+template <typename OutputIt, typename Char>
+inline OutputIt write_significand(OutputIt out, const char* significand,
+ int significand_size, int integral_size,
+ Char decimal_point) {
+ out = detail::copy_str<Char>(significand, significand + integral_size, out);
+ if (!decimal_point) return out;
+ *out++ = decimal_point;
+ return detail::copy_str<Char>(significand + integral_size,
+ significand + significand_size, out);
+}
+
+template <typename OutputIt, typename DecimalFP, typename Char>
+OutputIt write_float(OutputIt out, const DecimalFP& fp,
+ const basic_format_specs<Char>& specs, float_specs fspecs,
+ Char decimal_point) {
+ auto significand = fp.significand;
+ int significand_size = get_significand_size(fp);
+ static const Char zero = static_cast<Char>('0');
+ auto sign = fspecs.sign;
+ size_t size = to_unsigned(significand_size) + (sign ? 1 : 0);
+ using iterator = remove_reference_t<decltype(reserve(out, 0))>;
+
+ int output_exp = fp.exponent + significand_size - 1;
+ auto use_exp_format = [=]() {
+ if (fspecs.format == float_format::exp) return true;
+ if (fspecs.format != float_format::general) return false;
+ // Use the fixed notation if the exponent is in [exp_lower, exp_upper),
+ // e.g. 0.0001 instead of 1e-04. Otherwise use the exponent notation.
+ const int exp_lower = -4, exp_upper = 16;
+ return output_exp < exp_lower ||
+ output_exp >= (fspecs.precision > 0 ? fspecs.precision : exp_upper);
+ };
+ if (use_exp_format()) {
+ int num_zeros = 0;
+ if (fspecs.showpoint) {
+ num_zeros = (std::max)(fspecs.precision - significand_size, 0);
+ size += to_unsigned(num_zeros);
+ } else if (significand_size == 1) {
+ decimal_point = Char();
+ }
+ auto abs_output_exp = output_exp >= 0 ? output_exp : -output_exp;
+ int exp_digits = 2;
+ if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3;
+
+ size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits);
+ char exp_char = fspecs.upper ? 'E' : 'e';
+ auto write = [=](iterator it) {
+ if (sign) *it++ = static_cast<Char>(data::signs[sign]);
+ // Insert a decimal point after the first digit and add an exponent.
+ it = write_significand(it, significand, significand_size, 1,
+ decimal_point);
+ if (num_zeros > 0) it = std::fill_n(it, num_zeros, zero);
+ *it++ = static_cast<Char>(exp_char);
+ return write_exponent<Char>(output_exp, it);
+ };
+ return specs.width > 0 ? write_padded<align::right>(out, specs, size, write)
+ : base_iterator(out, write(reserve(out, size)));
+ }
+
+ int exp = fp.exponent + significand_size;
+ if (fp.exponent >= 0) {
+ // 1234e5 -> 123400000[.0+]
+ size += to_unsigned(fp.exponent);
+ int num_zeros = fspecs.precision - exp;
+#ifdef FMT_FUZZ
+ if (num_zeros > 5000)
+ throw std::runtime_error("fuzz mode - avoiding excessive cpu use");
+#endif
+ if (fspecs.showpoint) {
+ if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1;
+ if (num_zeros > 0) size += to_unsigned(num_zeros);
+ }
+ return write_padded<align::right>(out, specs, size, [&](iterator it) {
+ if (sign) *it++ = static_cast<Char>(data::signs[sign]);
+ it = write_significand<Char>(it, significand, significand_size);
+ it = std::fill_n(it, fp.exponent, zero);
+ if (!fspecs.showpoint) return it;
+ *it++ = decimal_point;
+ return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it;
+ });
+ } else if (exp > 0) {
+ // 1234e-2 -> 12.34[0+]
+ int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0;
+ size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0);
+ return write_padded<align::right>(out, specs, size, [&](iterator it) {
+ if (sign) *it++ = static_cast<Char>(data::signs[sign]);
+ it = write_significand(it, significand, significand_size, exp,
+ decimal_point);
+ return num_zeros > 0 ? std::fill_n(it, num_zeros, zero) : it;
+ });
+ }
+ // 1234e-6 -> 0.001234
+ int num_zeros = -exp;
+ if (significand_size == 0 && fspecs.precision >= 0 &&
+ fspecs.precision < num_zeros) {
+ num_zeros = fspecs.precision;
+ }
+ size += 2 + to_unsigned(num_zeros);
+ return write_padded<align::right>(out, specs, size, [&](iterator it) {
+ if (sign) *it++ = static_cast<Char>(data::signs[sign]);
+ *it++ = zero;
+ if (num_zeros == 0 && significand_size == 0 && !fspecs.showpoint) return it;
+ *it++ = decimal_point;
+ it = std::fill_n(it, num_zeros, zero);
+ return write_significand<Char>(it, significand, significand_size);
+ });
+}
+
template <typename Char, typename OutputIt, typename T,
FMT_ENABLE_IF(std::is_floating_point<T>::value)>
OutputIt write(OutputIt out, T value, basic_format_specs<Char> specs,
++precision;
}
if (const_check(std::is_same<T, float>())) fspecs.binary32 = true;
- fspecs.use_grisu = use_grisu<T>();
+ fspecs.use_grisu = is_fast_float<T>();
int exp = format_float(promote_float(value), precision, fspecs, buffer);
fspecs.precision = precision;
Char point =
fspecs.locale ? decimal_point<Char>(loc) : static_cast<Char>('.');
- float_writer<Char> w(buffer.data(), static_cast<int>(buffer.size()), exp,
- fspecs, point);
- return write_padded<align::right>(out, specs, w.size(), w);
+ auto fp = big_decimal_fp{buffer.data(), static_cast<int>(buffer.size()), exp};
+ return write_float(out, fp, specs, fspecs, point);
}
template <typename Char, typename OutputIt, typename T,
- FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+ FMT_ENABLE_IF(is_fast_float<T>::value)>
OutputIt write(OutputIt out, T value) {
if (const_check(!is_supported_floating_point(value))) return out;
+
+ using floaty = conditional_t<std::is_same<T, long double>::value, double, T>;
+ using uint = typename dragonbox::float_info<floaty>::carrier_uint;
+ auto bits = bit_cast<uint>(value);
+
auto fspecs = float_specs();
- if (std::signbit(value)) { // value < 0 is false for NaN so use signbit.
+ auto sign_bit = bits & (uint(1) << (num_bits<uint>() - 1));
+ if (sign_bit != 0) {
fspecs.sign = sign::minus;
value = -value;
}
- auto specs = basic_format_specs<Char>();
- if (!std::isfinite(value))
+ static const auto specs = basic_format_specs<Char>();
+ uint mask = exponent_mask<floaty>();
+ if ((bits & mask) == mask)
return write_nonfinite(out, std::isinf(value), specs, fspecs);
- memory_buffer buffer;
- int precision = -1;
- if (const_check(std::is_same<T, float>())) fspecs.binary32 = true;
- fspecs.use_grisu = use_grisu<T>();
- int exp = format_float(promote_float(value), precision, fspecs, buffer);
- fspecs.precision = precision;
- float_writer<Char> w(buffer.data(), static_cast<int>(buffer.size()), exp,
- fspecs, static_cast<Char>('.'));
- return base_iterator(out, w(reserve(out, w.size())));
+ auto dec = dragonbox::to_decimal(static_cast<floaty>(value));
+ return write_float(out, dec, specs, fspecs, static_cast<Char>('.'));
+}
+
+template <typename Char, typename OutputIt, typename T,
+ FMT_ENABLE_IF(std::is_floating_point<T>::value &&
+ !is_fast_float<T>::value)>
+inline OutputIt write(OutputIt out, T value) {
+ return write(out, value, basic_format_specs<Char>());
}
template <typename Char, typename OutputIt>
return base_iterator(out, it);
}
+template <typename Char>
+buffer_appender<Char> write(buffer_appender<Char> out,
+ basic_string_view<Char> value) {
+ get_container(out).append(value.begin(), value.end());
+ return out;
+}
+
template <typename Char, typename OutputIt, typename T,
FMT_ENABLE_IF(is_integral<T>::value &&
!std::is_same<T, bool>::value &&
// Don't do -abs_value since it trips unsigned-integer-overflow sanitizer.
if (negative) abs_value = ~abs_value + 1;
int num_digits = count_digits(abs_value);
- auto it = reserve(out, (negative ? 1 : 0) + static_cast<size_t>(num_digits));
+ auto size = (negative ? 1 : 0) + static_cast<size_t>(num_digits);
+ auto it = reserve(out, size);
+ if (auto ptr = to_pointer<Char>(it, size)) {
+ if (negative) *ptr++ = static_cast<Char>('-');
+ format_decimal<Char>(ptr, abs_value, num_digits);
+ return out;
+ }
if (negative) *it++ = static_cast<Char>('-');
it = format_decimal<Char>(it, abs_value, num_digits).end;
return base_iterator(out, it);
mapped_type_constant<T, basic_format_context<OutputIt, Char>>::value ==
type::custom_type,
OutputIt>::type {
- basic_format_context<OutputIt, Char> ctx(out, {}, {});
- return formatter<T>().format(value, ctx);
+ using context_type = basic_format_context<OutputIt, Char>;
+ using formatter_type =
+ conditional_t<has_formatter<T, context_type>::value,
+ typename context_type::template formatter_type<T>,
+ fallback_formatter<T, Char>>;
+ context_type ctx(out, {}, {});
+ return formatter_type().format(value, ctx);
}
// An argument visitor that formats the argument and writes it via the output
}
};
+/** The default argument formatter. */
+template <typename OutputIt, typename Char>
+class arg_formatter : public arg_formatter_base<OutputIt, Char> {
+ private:
+ using char_type = Char;
+ using base = arg_formatter_base<OutputIt, Char>;
+ using context_type = basic_format_context<OutputIt, Char>;
+
+ context_type& ctx_;
+ basic_format_parse_context<char_type>* parse_ctx_;
+ const Char* ptr_;
+
+ public:
+ using iterator = typename base::iterator;
+ using format_specs = typename base::format_specs;
+
+ /**
+ \rst
+ Constructs an argument formatter object.
+ *ctx* is a reference to the formatting context,
+ *specs* contains format specifier information for standard argument types.
+ \endrst
+ */
+ explicit arg_formatter(
+ context_type& ctx,
+ basic_format_parse_context<char_type>* parse_ctx = nullptr,
+ format_specs* specs = nullptr, const Char* ptr = nullptr)
+ : base(ctx.out(), specs, ctx.locale()),
+ ctx_(ctx),
+ parse_ctx_(parse_ctx),
+ ptr_(ptr) {}
+
+ using base::operator();
+
+ /** Formats an argument of a user-defined type. */
+ iterator operator()(typename basic_format_arg<context_type>::handle handle) {
+ if (ptr_) advance_to(*parse_ctx_, ptr_);
+ handle.format(*parse_ctx_, ctx_);
+ return ctx_.out();
+ }
+};
+
template <typename Char> FMT_CONSTEXPR bool is_name_start(Char c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c;
}
Context& ctx)
: parse_ctx_(parse_ctx), ctx_(ctx) {}
- bool operator()(typename basic_format_arg<Context>::handle h) const {
+ void operator()(typename basic_format_arg<Context>::handle h) const {
h.format(parse_ctx_, ctx_);
- return true;
}
- template <typename T> bool operator()(T) const { return false; }
+ template <typename T> void operator()(T) const {}
};
template <typename T>
};
template <typename Char>
-FMT_CONSTEXPR const Char* next_code_point(const Char* begin, const Char* end) {
- if (const_check(sizeof(Char) != 1) || (*begin & 0x80) == 0) return begin + 1;
- do {
- ++begin;
- } while (begin != end && (*begin & 0xc0) == 0x80);
- return begin;
+FMT_CONSTEXPR int code_point_length(const Char* begin) {
+ if (const_check(sizeof(Char) != 1)) return 1;
+ constexpr char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0};
+ int len = lengths[static_cast<unsigned char>(*begin) >> 3];
+
+ // Compute the pointer to the next character early so that the next
+ // iteration can start working on the next character. Neither Clang
+ // nor GCC figure out this reordering on their own.
+ return len + !len;
+}
+
+template <typename Char> constexpr bool is_ascii_letter(Char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+// Converts a character to ASCII. Returns a number > 127 on conversion failure.
+template <typename Char, FMT_ENABLE_IF(std::is_integral<Char>::value)>
+constexpr Char to_ascii(Char value) {
+ return value;
+}
+template <typename Char, FMT_ENABLE_IF(std::is_enum<Char>::value)>
+constexpr typename std::underlying_type<Char>::type to_ascii(Char value) {
+ return value;
}
// Parses fill and alignment.
Handler&& handler) {
FMT_ASSERT(begin != end, "");
auto align = align::none;
- auto p = next_code_point(begin, end);
- if (p == end) p = begin;
+ auto p = begin + code_point_length(begin);
+ if (p >= end) p = begin;
for (;;) {
- switch (static_cast<char>(*p)) {
+ switch (to_ascii(*p)) {
case '<':
align = align::left;
break;
template <typename Char, typename SpecHandler>
FMT_CONSTEXPR const Char* parse_format_specs(const Char* begin, const Char* end,
SpecHandler&& handler) {
- if (begin == end || *begin == '}') return begin;
+ if (begin == end) return begin;
begin = parse_align(begin, end, handler);
if (begin == end) return begin;
// Parse sign.
- switch (static_cast<char>(*begin)) {
+ switch (to_ascii(*begin)) {
case '+':
handler.on_plus();
++begin;
Handler&& handler) {
++begin;
if (begin == end) return handler.on_error("invalid format string"), end;
- if (static_cast<char>(*begin) == '}') {
+ if (*begin == '}') {
handler.on_replacement_field(handler.on_arg_id(), begin);
} else if (*begin == '{') {
handler.on_text(begin, begin + 1);
return;
}
struct writer {
- FMT_CONSTEXPR void operator()(const Char* begin, const Char* end) {
- if (begin == end) return;
+ FMT_CONSTEXPR void operator()(const Char* pbegin, const Char* pend) {
+ if (pbegin == pend) return;
for (;;) {
const Char* p = nullptr;
- if (!find<IS_CONSTEXPR>(begin, end, '}', p))
- return handler_.on_text(begin, end);
+ if (!find<IS_CONSTEXPR>(pbegin, pend, '}', p))
+ return handler_.on_text(pbegin, pend);
++p;
- if (p == end || *p != '}')
+ if (p == pend || *p != '}')
return handler_.on_error("unmatched '}' in format string");
- handler_.on_text(begin, p);
- begin = p + 1;
+ handler_.on_text(pbegin, p);
+ pbegin = p + 1;
}
}
Handler& handler_;
return f.parse(ctx);
}
-template <typename ArgFormatter, typename Char, typename Context>
+template <typename OutputIt, typename Char, typename Context>
struct format_handler : detail::error_handler {
basic_format_parse_context<Char> parse_context;
Context context;
- format_handler(typename ArgFormatter::iterator out,
- basic_string_view<Char> str,
+ format_handler(OutputIt out, basic_string_view<Char> str,
basic_format_args<Context> format_args, detail::locale_ref loc)
: parse_context(str), context(out, format_args, loc) {}
FMT_INLINE void on_replacement_field(int id, const Char*) {
auto arg = get_arg(context, id);
context.advance_to(visit_format_arg(
- default_arg_formatter<typename ArgFormatter::iterator, Char>{
- context.out(), context.args(), context.locale()},
+ default_arg_formatter<OutputIt, Char>{context.out(), context.args(),
+ context.locale()},
arg));
}
const Char* on_format_specs(int id, const Char* begin, const Char* end) {
- advance_to(parse_context, begin);
auto arg = get_arg(context, id);
- custom_formatter<Context> f(parse_context, context);
- if (visit_format_arg(f, arg)) return parse_context.begin();
- basic_format_specs<Char> specs;
- using parse_context_t = basic_format_parse_context<Char>;
- specs_checker<specs_handler<parse_context_t, Context>> handler(
- specs_handler<parse_context_t, Context>(specs, parse_context, context),
- arg.type());
- begin = parse_format_specs(begin, end, handler);
- if (begin == end || *begin != '}') on_error("missing '}' in format string");
- advance_to(parse_context, begin);
- context.advance_to(
- visit_format_arg(ArgFormatter(context, &parse_context, &specs), arg));
+ if (arg.type() == type::custom_type) {
+ advance_to(parse_context, begin);
+ visit_format_arg(custom_formatter<Context>(parse_context, context), arg);
+ return parse_context.begin();
+ }
+ auto specs = basic_format_specs<Char>();
+ if (begin + 1 < end && begin[1] == '}' && is_ascii_letter(*begin)) {
+ specs.type = static_cast<char>(*begin++);
+ } else {
+ using parse_context_t = basic_format_parse_context<Char>;
+ specs_checker<specs_handler<parse_context_t, Context>> handler(
+ specs_handler<parse_context_t, Context>(specs, parse_context,
+ context),
+ arg.type());
+ begin = parse_format_specs(begin, end, handler);
+ if (begin == end || *begin != '}')
+ on_error("missing '}' in format string");
+ }
+ context.advance_to(visit_format_arg(
+ arg_formatter<OutputIt, Char>(context, &parse_context, &specs), arg));
return begin;
}
};
FMT_API void report_error(format_func func, int error_code,
string_view message) FMT_NOEXCEPT;
-
-/** The default argument formatter. */
-template <typename OutputIt, typename Char>
-class arg_formatter : public arg_formatter_base<OutputIt, Char> {
- private:
- using char_type = Char;
- using base = arg_formatter_base<OutputIt, Char>;
- using context_type = basic_format_context<OutputIt, Char>;
-
- context_type& ctx_;
- basic_format_parse_context<char_type>* parse_ctx_;
- const Char* ptr_;
-
- public:
- using iterator = typename base::iterator;
- using format_specs = typename base::format_specs;
-
- /**
- \rst
- Constructs an argument formatter object.
- *ctx* is a reference to the formatting context,
- *specs* contains format specifier information for standard argument types.
- \endrst
- */
- explicit arg_formatter(
- context_type& ctx,
- basic_format_parse_context<char_type>* parse_ctx = nullptr,
- format_specs* specs = nullptr, const Char* ptr = nullptr)
- : base(ctx.out(), specs, ctx.locale()),
- ctx_(ctx),
- parse_ctx_(parse_ctx),
- ptr_(ptr) {}
-
- using base::operator();
-
- /** Formats an argument of a user-defined type. */
- iterator operator()(typename basic_format_arg<context_type>::handle handle) {
- if (ptr_) advance_to(*parse_ctx_, ptr_);
- handle.format(*parse_ctx_, ctx_);
- return ctx_.out();
- }
-};
} // namespace detail
template <typename OutputIt, typename Char>
using arg_formatter FMT_DEPRECATED_ALIAS =
- detail::arg_formatter<OutputIt, Char>;
+ detail::arg_formatter<OutputIt, Char>;
/**
An error returned by an operating system or a language runtime,
// using variant = std::variant<int, std::string>;
// template <>
// struct formatter<variant>: dynamic_formatter<> {
-// void format(buffer &buf, const variant &v, context &ctx) {
-// visit([&](const auto &val) { format(buf, val, ctx); }, v);
+// auto format(const variant& v, format_context& ctx) {
+// return visit([&](const auto& val) {
+// return dynamic_formatter<>::format(val, ctx);
+// }, v);
// }
// };
template <typename Char = char> class dynamic_formatter {
ctx.advance_to(ctx.begin() + (p - &*ctx.begin()));
}
-/** Formats arguments and writes the output to the range. */
-template <typename ArgFormatter, typename Char, typename Context>
-typename Context::iterator vformat_to(
- typename ArgFormatter::iterator out, basic_string_view<Char> format_str,
- basic_format_args<Context> args,
- detail::locale_ref loc = detail::locale_ref()) {
- if (format_str.size() == 2 && detail::equal2(format_str.data(), "{}")) {
- auto arg = args.get(0);
- if (!arg) detail::error_handler().on_error("argument not found");
- using iterator = typename ArgFormatter::iterator;
- return visit_format_arg(
- detail::default_arg_formatter<iterator, Char>{out, args, loc}, arg);
- }
- detail::format_handler<ArgFormatter, Char, Context> h(out, format_str, args,
- loc);
- detail::parse_format_string<false>(format_str, h);
- return h.context.out();
-}
+/**
+ \rst
+ Converts ``p`` to ``const void*`` for pointer formatting.
-// Casts ``p`` to ``const void*`` for pointer formatting.
-// Example:
-// auto s = format("{}", ptr(p));
+ **Example**::
+
+ auto s = fmt::format("{}", fmt::ptr(p));
+ \endrst
+ */
template <typename T> inline const void* ptr(const T* p) { return p; }
template <typename T> inline const void* ptr(const std::unique_ptr<T>& p) {
return p.get();
};
template <> struct formatter<bytes> {
+ private:
+ detail::dynamic_format_specs<char> specs_;
+
+ public:
template <typename ParseContext>
FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
using handler_type = detail::dynamic_specs_handler<ParseContext>;
specs_.precision, specs_.precision_ref, ctx);
return detail::write_bytes(ctx.out(), b.data_, specs_);
}
-
- private:
- detail::dynamic_format_specs<char> specs_;
};
template <typename It, typename Sentinel, typename Char>
\endrst
*/
template <typename Range>
-arg_join<detail::iterator_t<const Range>, detail::sentinel_t<const Range>, char>
-join(const Range& range, string_view sep) {
+arg_join<detail::iterator_t<Range>, detail::sentinel_t<Range>, char> join(
+ Range&& range, string_view sep) {
return join(std::begin(range), std::end(range), sep);
}
template <typename Range>
-arg_join<detail::iterator_t<const Range>, detail::sentinel_t<const Range>,
- wchar_t>
-join(const Range& range, wstring_view sep) {
+arg_join<detail::iterator_t<Range>, detail::sentinel_t<Range>, wchar_t> join(
+ Range&& range, wstring_view sep) {
return join(std::begin(range), std::end(range), sep);
}
// The buffer should be large enough to store the number including the sign or
// "false" for bool.
constexpr int max_size = detail::digits10<T>() + 2;
- char buffer[max_size > 5 ? max_size : 5];
+ char buffer[max_size > 5 ? static_cast<unsigned>(max_size) : 5];
char* begin = buffer;
return std::string(begin, detail::write<char>(begin, value));
}
}
template <typename Char>
-typename buffer_context<Char>::iterator detail::vformat_to(
+void detail::vformat_to(
detail::buffer<Char>& buf, basic_string_view<Char> format_str,
- basic_format_args<buffer_context<type_identity_t<Char>>> args) {
- using af = arg_formatter<typename buffer_context<Char>::iterator, Char>;
- return vformat_to<af>(std::back_inserter(buf), to_string_view(format_str),
- args);
+ basic_format_args<buffer_context<type_identity_t<Char>>> args,
+ detail::locale_ref loc) {
+ using iterator = typename buffer_context<Char>::iterator;
+ auto out = buffer_appender<Char>(buf);
+ if (format_str.size() == 2 && equal2(format_str.data(), "{}")) {
+ auto arg = args.get(0);
+ if (!arg) error_handler().on_error("argument not found");
+ visit_format_arg(default_arg_formatter<iterator, Char>{out, args, loc},
+ arg);
+ return;
+ }
+ format_handler<iterator, Char, buffer_context<Char>> h(out, format_str, args,
+ loc);
+ parse_format_string<false>(format_str, h);
}
#ifndef FMT_HEADER_ONLY
-extern template format_context::iterator detail::vformat_to(
- detail::buffer<char>&, string_view, basic_format_args<format_context>);
+extern template void detail::vformat_to(detail::buffer<char>&, string_view,
+ basic_format_args<format_context>,
+ detail::locale_ref);
namespace detail {
+
extern template FMT_API std::string grouping_impl<char>(locale_ref loc);
extern template FMT_API std::string grouping_impl<wchar_t>(locale_ref loc);
extern template FMT_API char thousands_sep_impl<char>(locale_ref loc);
template <typename S, typename Char = char_t<S>,
FMT_ENABLE_IF(detail::is_string<S>::value)>
-inline typename FMT_BUFFER_CONTEXT(Char)::iterator vformat_to(
+inline void vformat_to(
detail::buffer<Char>& buf, const S& format_str,
basic_format_args<FMT_BUFFER_CONTEXT(type_identity_t<Char>)> args) {
return detail::vformat_to(buf, to_string_view(format_str), args);
typename Char = enable_if_t<detail::is_string<S>::value, char_t<S>>>
inline typename buffer_context<Char>::iterator format_to(
basic_memory_buffer<Char, SIZE>& buf, const S& format_str, Args&&... args) {
- detail::check_format_string<Args...>(format_str);
- using context = buffer_context<Char>;
- return detail::vformat_to(buf, to_string_view(format_str),
- make_format_args<context>(args...));
+ const auto& vargs = fmt::make_args_checked<Args...>(format_str, args...);
+ detail::vformat_to(buf, to_string_view(format_str), vargs);
+ return detail::buffer_appender<Char>(buf);
}
template <typename OutputIt, typename Char = char>
template <typename OutputIt, typename Char = char>
using format_args_t = basic_format_args<format_context_t<OutputIt, Char>>;
-template <
- typename S, typename OutputIt, typename... Args,
- FMT_ENABLE_IF(detail::is_output_iterator<OutputIt>::value &&
- !detail::is_contiguous_back_insert_iterator<OutputIt>::value)>
-inline OutputIt vformat_to(
- OutputIt out, const S& format_str,
- format_args_t<type_identity_t<OutputIt>, char_t<S>> args) {
- using af = detail::arg_formatter<OutputIt, char_t<S>>;
- return vformat_to<af>(out, to_string_view(format_str), args);
-}
-
-/**
- \rst
- Formats arguments, writes the result to the output iterator ``out`` and returns
- the iterator past the end of the output range.
-
- **Example**::
-
- std::vector<char> out;
- fmt::format_to(std::back_inserter(out), "{}", 42);
- \endrst
- */
-template <typename OutputIt, typename S, typename... Args,
- FMT_ENABLE_IF(
- detail::is_output_iterator<OutputIt>::value &&
- !detail::is_contiguous_back_insert_iterator<OutputIt>::value &&
- detail::is_string<S>::value)>
-inline OutputIt format_to(OutputIt out, const S& format_str, Args&&... args) {
- detail::check_format_string<Args...>(format_str);
- using context = format_context_t<OutputIt, char_t<S>>;
- return vformat_to(out, to_string_view(format_str),
- make_format_args<context>(args...));
-}
-
-template <typename OutputIt> struct format_to_n_result {
- /** Iterator past the end of the output range. */
- OutputIt out;
- /** Total (not truncated) output size. */
- size_t size;
-};
-
template <typename OutputIt, typename Char = typename OutputIt::value_type>
-using format_to_n_context =
- format_context_t<detail::truncating_iterator<OutputIt>, Char>;
+using format_to_n_context FMT_DEPRECATED_ALIAS = buffer_context<Char>;
template <typename OutputIt, typename Char = typename OutputIt::value_type>
-using format_to_n_args = basic_format_args<format_to_n_context<OutputIt, Char>>;
+using format_to_n_args FMT_DEPRECATED_ALIAS =
+ basic_format_args<buffer_context<Char>>;
template <typename OutputIt, typename Char, typename... Args>
-inline format_arg_store<format_to_n_context<OutputIt, Char>, Args...>
+FMT_DEPRECATED format_arg_store<buffer_context<Char>, Args...>
make_format_to_n_args(const Args&... args) {
- return format_arg_store<format_to_n_context<OutputIt, Char>, Args...>(
- args...);
-}
-
-template <typename OutputIt, typename Char, typename... Args,
- FMT_ENABLE_IF(detail::is_output_iterator<OutputIt>::value)>
-inline format_to_n_result<OutputIt> vformat_to_n(
- OutputIt out, size_t n, basic_string_view<Char> format_str,
- format_to_n_args<type_identity_t<OutputIt>, type_identity_t<Char>> args) {
- auto it = vformat_to(detail::truncating_iterator<OutputIt>(out, n),
- format_str, args);
- return {it.base(), it.count()};
-}
-
-/**
- \rst
- Formats arguments, writes up to ``n`` characters of the result to the output
- iterator ``out`` and returns the total output size and the iterator past the
- end of the output range.
- \endrst
- */
-template <typename OutputIt, typename S, typename... Args,
- FMT_ENABLE_IF(detail::is_string<S>::value&&
- detail::is_output_iterator<OutputIt>::value)>
-inline format_to_n_result<OutputIt> format_to_n(OutputIt out, size_t n,
- const S& format_str,
- const Args&... args) {
- detail::check_format_string<Args...>(format_str);
- using context = format_to_n_context<OutputIt, char_t<S>>;
- return vformat_to_n(out, n, to_string_view(format_str),
- make_format_args<context>(args...));
+ return format_arg_store<buffer_context<Char>, Args...>(args...);
}
template <typename Char, enable_if_t<(!std::is_same<Char, char>::value), int>>
return to_string(buffer);
}
-/**
- Returns the number of characters in the output of
- ``format(format_str, args...)``.
- */
-template <typename... Args>
-inline size_t formatted_size(string_view format_str, const Args&... args) {
- return format_to(detail::counting_iterator(), format_str, args...).count();
-}
-
template <typename Char, FMT_ENABLE_IF(std::is_same<Char, wchar_t>::value)>
void vprint(std::FILE* f, basic_string_view<Char> format_str,
wformat_args args) {
template <typename... Args>
std::basic_string<Char> operator()(Args&&... args) const {
static FMT_CONSTEXPR_DECL Char s[] = {CHARS..., '\0'};
- check_format_string<remove_cvref_t<Args>...>(FMT_STRING(s));
- return format(s, std::forward<Args>(args)...);
+ return format(FMT_STRING(s), std::forward<Args>(args)...);
}
};
# else
return precision < 0 ? snprintf_ptr(buf, size, format, value)
: snprintf_ptr(buf, size, format, precision, value);
}
+
+template FMT_API dragonbox::decimal_fp<float> dragonbox::to_decimal(float x)
+ FMT_NOEXCEPT;
+template FMT_API dragonbox::decimal_fp<double> dragonbox::to_decimal(double x)
+ FMT_NOEXCEPT;
+
+// DEPRECATED! This function exists for ABI compatibility.
+template <typename Char>
+typename basic_format_context<std::back_insert_iterator<buffer<Char>>,
+ Char>::iterator
+vformat_to(buffer<Char>& buf, basic_string_view<Char> format_str,
+ basic_format_args<basic_format_context<
+ std::back_insert_iterator<buffer<type_identity_t<Char>>>,
+ type_identity_t<Char>>>
+ args) {
+ using iterator = std::back_insert_iterator<buffer<char>>;
+ using context = basic_format_context<
+ std::back_insert_iterator<buffer<type_identity_t<Char>>>,
+ type_identity_t<Char>>;
+ auto out = iterator(buf);
+ format_handler<iterator, Char, context> h(out, format_str, args, {});
+ parse_format_string<false>(format_str, h);
+ return out;
+}
+template basic_format_context<std::back_insert_iterator<buffer<char>>,
+ char>::iterator
+vformat_to(buffer<char>&, string_view,
+ basic_format_args<basic_format_context<
+ std::back_insert_iterator<buffer<type_identity_t<char>>>,
+ type_identity_t<char>>>);
} // namespace detail
template struct FMT_INSTANTIATION_DEF_API detail::basic_data<void>;
template FMT_API void detail::buffer<char>::append(const char*, const char*);
-template FMT_API FMT_BUFFER_CONTEXT(char)::iterator detail::vformat_to(
+template FMT_API void detail::vformat_to(
detail::buffer<char>&, string_view,
- basic_format_args<FMT_BUFFER_CONTEXT(char)>);
+ basic_format_args<FMT_BUFFER_CONTEXT(char)>, detail::locale_ref);
template FMT_API int detail::snprintf_float(double, int, detail::float_specs,
detail::buffer<char>&);
#define NONSTD_OPTIONAL_LITE_HPP
#define optional_lite_MAJOR 3
-#define optional_lite_MINOR 2
+#define optional_lite_MINOR 4
#define optional_lite_PATCH 0
#define optional_lite_VERSION optional_STRINGIFY(optional_lite_MAJOR) "." optional_STRINGIFY(optional_lite_MINOR) "." optional_STRINGIFY(optional_lite_PATCH)
#define optional_OPTIONAL_NONSTD 1
#define optional_OPTIONAL_STD 2
+// tweak header support:
+
+#ifdef __has_include
+# if __has_include(<nonstd/optional.tweak.hpp>)
+# include <nonstd/optional.tweak.hpp>
+# endif
+#define optional_HAVE_TWEAK_HEADER 1
+#else
+#define optional_HAVE_TWEAK_HEADER 0
+//# pragma message("optional.hpp: Note: Tweak header not supported.")
+#endif
+
+// optional selection and configuration:
+
#if !defined( optional_CONFIG_SELECT_OPTIONAL )
# define optional_CONFIG_SELECT_OPTIONAL ( optional_HAVE_STD_OPTIONAL ? optional_OPTIONAL_STD : optional_OPTIONAL_NONSTD )
#endif
// Control presence of exception handling (try and auto discover):
#ifndef optional_CONFIG_NO_EXCEPTIONS
-# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)
+# if _MSC_VER
+# include <cstddef> // for _HAS_EXCEPTIONS
+# endif
+# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS)
# define optional_CONFIG_NO_EXCEPTIONS 0
# else
# define optional_CONFIG_NO_EXCEPTIONS 1
// Compiler versions:
//
-// MSVC++ 6.0 _MSC_VER == 1200 (Visual Studio 6.0)
-// MSVC++ 7.0 _MSC_VER == 1300 (Visual Studio .NET 2002)
-// MSVC++ 7.1 _MSC_VER == 1310 (Visual Studio .NET 2003)
-// MSVC++ 8.0 _MSC_VER == 1400 (Visual Studio 2005)
-// MSVC++ 9.0 _MSC_VER == 1500 (Visual Studio 2008)
-// MSVC++ 10.0 _MSC_VER == 1600 (Visual Studio 2010)
-// MSVC++ 11.0 _MSC_VER == 1700 (Visual Studio 2012)
-// MSVC++ 12.0 _MSC_VER == 1800 (Visual Studio 2013)
-// MSVC++ 14.0 _MSC_VER == 1900 (Visual Studio 2015)
-// MSVC++ 14.1 _MSC_VER >= 1910 (Visual Studio 2017)
+// MSVC++ 6.0 _MSC_VER == 1200 optional_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0)
+// MSVC++ 7.0 _MSC_VER == 1300 optional_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002)
+// MSVC++ 7.1 _MSC_VER == 1310 optional_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003)
+// MSVC++ 8.0 _MSC_VER == 1400 optional_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005)
+// MSVC++ 9.0 _MSC_VER == 1500 optional_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008)
+// MSVC++ 10.0 _MSC_VER == 1600 optional_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010)
+// MSVC++ 11.0 _MSC_VER == 1700 optional_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012)
+// MSVC++ 12.0 _MSC_VER == 1800 optional_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013)
+// MSVC++ 14.0 _MSC_VER == 1900 optional_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015)
+// MSVC++ 14.1 _MSC_VER >= 1910 optional_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017)
+// MSVC++ 14.2 _MSC_VER >= 1920 optional_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019)
#if defined(_MSC_VER ) && !defined(__clang__)
# define optional_COMPILER_MSVC_VER (_MSC_VER )
#define optional_CPP14_000 (optional_CPP14_OR_GREATER)
#define optional_CPP17_000 (optional_CPP17_OR_GREATER)
+// gcc >= 4.9, msvc >= vc14.1 (vs17):
+#define optional_CPP11_140_G490 ((optional_CPP11_OR_GREATER_ && optional_COMPILER_GNUC_VERSION >= 490) || (optional_COMPILER_MSVC_VER >= 1910))
+
+// clang >= 3.5, msvc >= vc11 (vs12):
+#define optional_CPP11_110_C350 ( optional_CPP11_110 && !optional_BETWEEN( optional_COMPILER_CLANG_VERSION, 1, 350 ) )
+
+// clang >= 3.5, gcc >= 5.0, msvc >= vc11 (vs12):
+#define optional_CPP11_110_C350_G500 \
+ ( optional_CPP11_110 && \
+ !( optional_BETWEEN( optional_COMPILER_CLANG_VERSION, 1, 350 ) \
+ || optional_BETWEEN( optional_COMPILER_GNUC_VERSION , 1, 500 ) ) )
+
// Presence of C++11 language features:
#define optional_HAVE_CONSTEXPR_11 optional_CPP11_140
#define optional_HAVE_IS_DEFAULT optional_CPP11_140
#define optional_HAVE_NOEXCEPT optional_CPP11_140
#define optional_HAVE_NULLPTR optional_CPP11_100
-#define optional_HAVE_REF_QUALIFIER optional_CPP11_140
+#define optional_HAVE_REF_QUALIFIER optional_CPP11_140_G490
+#define optional_HAVE_INITIALIZER_LIST optional_CPP11_140
// Presence of C++14 language features:
#define optional_HAVE_TR1_TYPE_TRAITS (!! optional_COMPILER_GNUC_VERSION )
#define optional_HAVE_TR1_ADD_POINTER (!! optional_COMPILER_GNUC_VERSION )
+#define optional_HAVE_IS_ASSIGNABLE optional_CPP11_110_C350
+#define optional_HAVE_IS_MOVE_CONSTRUCTIBLE optional_CPP11_110_C350
+#define optional_HAVE_IS_NOTHROW_MOVE_ASSIGNABLE optional_CPP11_110_C350
+#define optional_HAVE_IS_NOTHROW_MOVE_CONSTRUCTIBLE optional_CPP11_110_C350
+#define optional_HAVE_IS_TRIVIALLY_COPY_CONSTRUCTIBLE optional_CPP11_110_C350_G500
+#define optional_HAVE_IS_TRIVIALLY_MOVE_CONSTRUCTIBLE optional_CPP11_110_C350_G500
+
// C++ feature usage:
#if optional_HAVE( CONSTEXPR_11 )
template< bool B = (__VA_ARGS__), typename std::enable_if<B, int>::type = 0 >
#define optional_REQUIRES_T(...) \
- , typename = typename std::enable_if< (__VA_ARGS__), nonstd::optional_lite::detail::enabler >::type
+ , typename std::enable_if< (__VA_ARGS__), int >::type = 0
#define optional_REQUIRES_R(R, ...) \
typename std::enable_if< (__VA_ARGS__), R>::type
namespace std11 {
+template< class T, T v > struct integral_constant { enum { value = v }; };
+template< bool B > struct bool_constant : integral_constant<bool, B>{};
+
+typedef bool_constant< true > true_type;
+typedef bool_constant< false > false_type;
+
#if optional_CPP11_OR_GREATER
using std::move;
#else
template< typename T, typename F > struct conditional<false, T, F> { typedef F type; };
#endif // optional_HAVE_CONDITIONAL
+#if optional_HAVE( IS_ASSIGNABLE )
+ using std::is_assignable;
+#else
+ template< class T, class U > struct is_assignable : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_MOVE_CONSTRUCTIBLE )
+ using std::is_move_constructible;
+#else
+ template< class T > struct is_move_constructible : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_NOTHROW_MOVE_ASSIGNABLE )
+ using std::is_nothrow_move_assignable;
+#else
+ template< class T > struct is_nothrow_move_assignable : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_NOTHROW_MOVE_CONSTRUCTIBLE )
+ using std::is_nothrow_move_constructible;
+#else
+ template< class T > struct is_nothrow_move_constructible : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_TRIVIALLY_COPY_CONSTRUCTIBLE )
+ using std::is_trivially_copy_constructible;
+#else
+ template< class T > struct is_trivially_copy_constructible : std11::true_type{};
+#endif
+
+#if optional_HAVE( IS_TRIVIALLY_MOVE_CONSTRUCTIBLE )
+ using std::is_trivially_move_constructible;
+#else
+ template< class T > struct is_trivially_move_constructible : std11::true_type{};
+#endif
+
} // namespace std11
#if optional_CPP11_OR_GREATER
struct is_swappable
{
template< typename T, typename = decltype( swap( std::declval<T&>(), std::declval<T&>() ) ) >
- static std::true_type test( int /*unused*/ );
+ static std11::true_type test( int /*unused*/ );
template< typename >
- static std::false_type test(...);
+ static std11::false_type test(...);
};
struct is_nothrow_swappable
}
template< typename T >
- static auto test( int /*unused*/ ) -> std::integral_constant<bool, satisfies<T>()>{}
+ static auto test( int /*unused*/ ) -> std11::integral_constant<bool, satisfies<T>()>{}
template< typename >
- static auto test(...) -> std::false_type;
+ static auto test(...) -> std11::false_type;
};
} // namespace detail
namespace detail {
-// for optional_REQUIRES_T
-
-#if optional_CPP11_OR_GREATER
-enum class enabler{};
-#endif
-
// C++11 emulation:
struct nulltype{};
::new( value_ptr() ) value_type( std::move( v ) );
}
+ template< class... Args >
+ storage_t( nonstd_lite_in_place_t(T), Args&&... args )
+ {
+ emplace( std::forward<Args>(args)... );
+ }
+
template< class... Args >
void emplace( Args&&... args )
{
return * value_ptr();
}
-#if optional_CPP11_OR_GREATER
+#if optional_HAVE( REF_QUALIFIER )
optional_nodiscard value_type const && value() const optional_refref_qual
{
{}
// 2 - copy-construct
- optional_constexpr14 optional( optional const & other
#if optional_CPP11_OR_GREATER
- optional_REQUIRES_A(
- true || std::is_copy_constructible<T>::value
- )
+ // template< typename U = T
+ // optional_REQUIRES_T(
+ // std::is_copy_constructible<U>::value
+ // || std11::is_trivially_copy_constructible<U>::value
+ // )
+ // >
#endif
- )
+ optional_constexpr14 optional( optional const & other )
: has_value_( other.has_value() )
{
if ( other.has_value() )
#if optional_CPP11_OR_GREATER
// 3 (C++11) - move-construct from optional
- optional_constexpr14 optional( optional && other
- optional_REQUIRES_A(
- true || std::is_move_constructible<T>::value
+ template< typename U = T
+ optional_REQUIRES_T(
+ std11::is_move_constructible<U>::value
+ || std11::is_trivially_move_constructible<U>::value
)
- // NOLINTNEXTLINE( performance-noexcept-move-constructor )
- ) noexcept( std::is_nothrow_move_constructible<T>::value )
+ >
+ optional_constexpr14 optional( optional && other )
+ // NOLINTNEXTLINE( performance-noexcept-move-constructor )
+ noexcept( std11::is_nothrow_move_constructible<T>::value )
: has_value_( other.has_value() )
{
if ( other.has_value() )
}
// 4a (C++11) - explicit converting copy-construct from optional
- template< typename U >
- explicit optional( optional<U> const & other
- optional_REQUIRES_A(
+ template< typename U
+ optional_REQUIRES_T(
std::is_constructible<T, U const &>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
&& !std::is_convertible< optional<U> const &&, T>::value
&& !std::is_convertible< U const & , T>::value /*=> explicit */
)
- )
+ >
+ explicit optional( optional<U> const & other )
: has_value_( other.has_value() )
{
if ( other.has_value() )
#endif // optional_CPP11_OR_GREATER
// 4b (C++98 and later) - non-explicit converting copy-construct from optional
- template< typename U >
- // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
- optional( optional<U> const & other
+ template< typename U
#if optional_CPP11_OR_GREATER
- optional_REQUIRES_A(
+ optional_REQUIRES_T(
std::is_constructible<T, U const &>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
&& std::is_convertible< U const & , T>::value /*=> non-explicit */
)
#endif // optional_CPP11_OR_GREATER
- )
+ >
+ // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
+ /*non-explicit*/ optional( optional<U> const & other )
: has_value_( other.has_value() )
{
if ( other.has_value() )
#if optional_CPP11_OR_GREATER
// 5a (C++11) - explicit converting move-construct from optional
- template< typename U >
- explicit optional( optional<U> && other
- optional_REQUIRES_A(
+ template< typename U
+ optional_REQUIRES_T(
std::is_constructible<T, U &&>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
&& !std::is_convertible< optional<U> const &&, T>::value
&& !std::is_convertible< U &&, T>::value /*=> explicit */
)
+ >
+ explicit optional( optional<U> && other
)
: has_value_( other.has_value() )
{
}
// 5a (C++11) - non-explicit converting move-construct from optional
- template< typename U >
- // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
- optional( optional<U> && other
- optional_REQUIRES_A(
+ template< typename U
+ optional_REQUIRES_T(
std::is_constructible<T, U &&>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
&& !std::is_convertible< optional<U> const &&, T>::value
&& std::is_convertible< U &&, T>::value /*=> non-explicit */
)
- )
+ >
+ // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
+ /*non-explicit*/ optional( optional<U> && other )
: has_value_( other.has_value() )
{
if ( other.has_value() )
{}
// 8a (C++11) - explicit move construct from value
- template< typename U = value_type >
- optional_constexpr explicit optional( U && value
- optional_REQUIRES_A(
+ template< typename U = T
+ optional_REQUIRES_T(
std::is_constructible<T, U&&>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, nonstd_lite_in_place_t(U)>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, optional<T>>::value
&& !std::is_convertible<U&&, T>::value /*=> explicit */
)
- )
+ >
+ optional_constexpr explicit optional( U && value )
: has_value_( true )
- , contained( T{ std::forward<U>( value ) } )
+ , contained( nonstd_lite_in_place(T), std::forward<U>( value ) )
{}
// 8b (C++11) - non-explicit move construct from value
- template< typename U = value_type >
- // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
- optional_constexpr optional( U && value
- optional_REQUIRES_A(
+ template< typename U = T
+ optional_REQUIRES_T(
std::is_constructible<T, U&&>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, nonstd_lite_in_place_t(U)>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, optional<T>>::value
&& std::is_convertible<U&&, T>::value /*=> non-explicit */
)
- )
+ >
+ // NOLINTNEXTLINE( google-explicit-constructor, hicpp-explicit-conversions )
+ optional_constexpr /*non-explicit*/ optional( U && value )
: has_value_( true )
- , contained( std::forward<U>( value ) )
+ , contained( nonstd_lite_in_place(T), std::forward<U>( value ) )
{}
#else // optional_CPP11_OR_GREATER
)
operator=( optional const & other )
noexcept(
- std::is_nothrow_move_assignable<T>::value
- && std::is_nothrow_move_constructible<T>::value
+ std11::is_nothrow_move_assignable<T>::value
+ && std11::is_nothrow_move_constructible<T>::value
)
#else
optional & operator=( optional const & other )
optional_REQUIRES_R(
optional &,
true
-// std::is_move_constructible<T>::value
+// std11::is_move_constructible<T>::value
// && std::is_move_assignable<T>::value
)
operator=( optional && other ) noexcept
optional_REQUIRES_R(
optional &,
std::is_constructible<T , U>::value
- && std::is_assignable<T&, U>::value
+ && std11::is_assignable<T&, U>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, nonstd_lite_in_place_t(U)>::value
&& !std::is_same<typename std20::remove_cvref<U>::type, optional<T>>::value
&& !(std::is_scalar<T>::value && std::is_same<T, typename std::decay<U>::type>::value)
optional_REQUIRES_R(
optional&,
std::is_constructible< T , U const &>::value
- && std::is_assignable< T&, U const &>::value
+ && std11::is_assignable< T&, U const &>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
&& !std::is_constructible<T, optional<U> const & >::value
&& !std::is_convertible< optional<U> && , T>::value
&& !std::is_convertible< optional<U> const & , T>::value
&& !std::is_convertible< optional<U> const &&, T>::value
- && !std::is_assignable< T&, optional<U> & >::value
- && !std::is_assignable< T&, optional<U> && >::value
- && !std::is_assignable< T&, optional<U> const & >::value
- && !std::is_assignable< T&, optional<U> const && >::value
+ && !std11::is_assignable< T&, optional<U> & >::value
+ && !std11::is_assignable< T&, optional<U> && >::value
+ && !std11::is_assignable< T&, optional<U> const & >::value
+ && !std11::is_assignable< T&, optional<U> const && >::value
)
#else
optional&
optional_REQUIRES_R(
optional&,
std::is_constructible< T , U>::value
- && std::is_assignable< T&, U>::value
+ && std11::is_assignable< T&, U>::value
&& !std::is_constructible<T, optional<U> & >::value
&& !std::is_constructible<T, optional<U> && >::value
&& !std::is_constructible<T, optional<U> const & >::value
&& !std::is_convertible< optional<U> && , T>::value
&& !std::is_convertible< optional<U> const & , T>::value
&& !std::is_convertible< optional<U> const &&, T>::value
- && !std::is_assignable< T&, optional<U> & >::value
- && !std::is_assignable< T&, optional<U> && >::value
- && !std::is_assignable< T&, optional<U> const & >::value
- && !std::is_assignable< T&, optional<U> const && >::value
+ && !std11::is_assignable< T&, optional<U> & >::value
+ && !std11::is_assignable< T&, optional<U> && >::value
+ && !std11::is_assignable< T&, optional<U> const & >::value
+ && !std11::is_assignable< T&, optional<U> const && >::value
)
operator=( optional<U> && other )
{
void swap( optional & other )
#if optional_CPP11_OR_GREATER
noexcept(
- std::is_nothrow_move_constructible<T>::value
+ std11::is_nothrow_move_constructible<T>::value
&& std17::is_nothrow_swappable<T>::value
)
#endif
contained.value();
}
-#if optional_HAVE( REF_QUALIFIER ) && ( !optional_COMPILER_GNUC_VERSION || optional_COMPILER_GNUC_VERSION >= 490 )
+#if optional_HAVE( REF_QUALIFIER )
optional_constexpr value_type const && operator *() const optional_refref_qual
{
template< typename T
#if optional_CPP11_OR_GREATER
optional_REQUIRES_T(
- std::is_move_constructible<T>::value
+ std11::is_move_constructible<T>::value
&& std17::is_swappable<T>::value )
#endif
>
using optional_lite::optional;
using optional_lite::nullopt_t;
using optional_lite::nullopt;
+
+#if ! optional_CONFIG_NO_EXCEPTIONS
using optional_lite::bad_optional_access;
+#endif
using optional_lite::make_optional;
--- /dev/null
+/* $OpenBSD: mktemp.c,v 1.39 2017/11/28 06:55:49 tb Exp $ */
+/*
+ * Copyright (c) 1996-1998, 2008 Theo de Raadt
+ * Copyright (c) 1997, 2008-2009 Todd C. Miller
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef _WIN32
+#ifndef _WIN32_WINNT
+#define _WIN32_WINNT 0x0600 // _WIN32_WINNT_VISTA
+#endif
+
+#ifndef _CRT_NONSTDC_NO_DEPRECATE
+#define _CRT_NONSTDC_NO_DEPRECATE
+#endif
+
+#ifndef _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+#include <direct.h>
+#include <io.h>
+
+#define WIN32_LEAN_AND_MEAN
+#define NOMINMAX 1
+#define WIN32_NO_STATUS
+#include <windows.h>
+#undef WIN32_NO_STATUS
+#include <ntstatus.h>
+
+// Work-around wrong calling convention for RtlGenRandom in old mingw-w64
+#define SystemFunction036 __stdcall SystemFunction036
+#include <ntsecapi.h>
+#undef SystemFunction036
+#endif
+
+#ifdef _MSC_VER
+#define S_IRUSR (_S_IREAD)
+#define S_IWUSR (_S_IWRITE)
+#endif
+
+#define MKTEMP_NAME 0
+#define MKTEMP_FILE 1
+#define MKTEMP_DIR 2
+
+#define TEMPCHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+#define NUM_CHARS (sizeof(TEMPCHARS) - 1)
+#define MIN_X 6
+
+#ifdef _WIN32
+#define MKOTEMP_FLAGS (_O_APPEND|_O_NOINHERIT|_O_BINARY|_O_TEXT| \
+ _O_U16TEXT|_O_U8TEXT|_O_WTEXT)
+#define MKTEMP_FLAGS_DEFAULT (_O_BINARY)
+#else
+#define MKOTEMP_FLAGS (O_APPEND|O_CLOEXEC|O_DSYNC|O_RSYNC|O_SYNC)
+#define MKTEMP_FLAGS_DEFAULT (0)
+#endif
+
+#ifndef nitems
+#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
+#endif
+
+#ifdef _WIN32
+static BOOL CALLBACK
+lookup_ntdll_function_once(
+ PINIT_ONCE init_once, PVOID parameter, PVOID *context)
+{
+ (void)init_once;
+ *context = (PVOID)GetProcAddress(
+ GetModuleHandleA("ntdll.dll"), parameter);
+ return(TRUE);
+}
+
+static NTSTATUS
+GetLastNtStatus()
+{
+ static INIT_ONCE init_once = INIT_ONCE_STATIC_INIT;
+ typedef NTSTATUS(NTAPI * RtlGetLastNtStatus_t)(void);
+ RtlGetLastNtStatus_t get_last_nt_status = NULL;
+ InitOnceExecuteOnce(&init_once, lookup_ntdll_function_once,
+ "RtlGetLastNtStatus", (LPVOID *)&get_last_nt_status);
+ return(get_last_nt_status());
+}
+
+static int
+normalize_msvcrt_errno(int ret)
+{
+ if (ret == -1 && errno == EACCES && _doserrno == ERROR_ACCESS_DENIED) {
+ /*
+ * Win32 APIs return ERROR_ACCESS_DENIED for many distinct
+ * NTSTATUS codes, even when it's arguably inappropriate to do
+ * so, e.g. if you attempt to open a directory, or open a file
+ * that's in the "pending delete" state. These are mapped to
+ * EACCESS in the C runtime. We instead map these to EEXIST.
+ */
+ NTSTATUS nt_err = GetLastNtStatus();
+ if (nt_err == STATUS_FILE_IS_A_DIRECTORY ||
+ nt_err == STATUS_DELETE_PENDING) {
+ errno = EEXIST;
+ }
+ }
+ return(ret);
+}
+
+#define open(...) (normalize_msvcrt_errno(open(__VA_ARGS__)))
+#define mkdir(path, mode) (normalize_msvcrt_errno(mkdir(path)))
+#define lstat(path, sb) (normalize_msvcrt_errno(stat(path, sb)))
+
+static void (*_bsd_mkstemp_random_source)(void *buf, size_t n);
+
+void
+bsd_mkstemp_set_random_source(void (*f)(void *buf, size_t n))
+{
+ _bsd_mkstemp_random_source = f;
+}
+
+static void
+arc4random_buf(void *buf, size_t nbytes)
+{
+ if (_bsd_mkstemp_random_source != NULL) {
+ _bsd_mkstemp_random_source(buf, nbytes);
+ } else {
+ RtlGenRandom(buf, (ULONG)nbytes);
+ }
+}
+#endif
+
+static int
+mktemp_internal(char *path, int slen, int mode, int flags)
+{
+ char *start, *cp, *ep;
+ const char tempchars[] = TEMPCHARS;
+ unsigned int tries;
+ struct stat sb;
+ size_t len;
+ int fd;
+
+ len = strlen(path);
+ if (len < MIN_X || slen < 0 || (size_t)slen > len - MIN_X) {
+ errno = EINVAL;
+ return(-1);
+ }
+ ep = path + len - slen;
+
+ for (start = ep; start > path && start[-1] == 'X'; start--)
+ ;
+ if (ep - start < MIN_X) {
+ errno = EINVAL;
+ return(-1);
+ }
+
+ if (flags & ~MKOTEMP_FLAGS) {
+ errno = EINVAL;
+ return(-1);
+ }
+ flags |= O_CREAT|O_EXCL|O_RDWR;
+
+ tries = INT_MAX;
+ do {
+ cp = start;
+ do {
+ unsigned short rbuf[16];
+ unsigned int i;
+
+ /*
+ * Avoid lots of arc4random() calls by using
+ * a buffer sized for up to 16 Xs at a time.
+ */
+ arc4random_buf(rbuf, sizeof(rbuf));
+ for (i = 0; i < nitems(rbuf) && cp != ep; i++)
+ *cp++ = tempchars[rbuf[i] % NUM_CHARS];
+ } while (cp != ep);
+
+ switch (mode) {
+ case MKTEMP_NAME:
+ if (lstat(path, &sb) != 0)
+ return(errno == ENOENT ? 0 : -1);
+ break;
+ case MKTEMP_FILE:
+ fd = open(path, flags, S_IRUSR|S_IWUSR);
+ if (fd != -1 || errno != EEXIST)
+ return(fd);
+ break;
+ case MKTEMP_DIR:
+ if (mkdir(path, S_IRUSR|S_IWUSR|S_IXUSR) == 0)
+ return(0);
+ if (errno != EEXIST)
+ return(-1);
+ break;
+ }
+ } while (--tries);
+
+ errno = EEXIST;
+ return(-1);
+}
+
+char *
+bsd_mktemp(char *path)
+{
+ if (mktemp_internal(path, 0, MKTEMP_NAME, MKTEMP_FLAGS_DEFAULT) == -1)
+ return(NULL);
+ return(path);
+}
+
+int
+bsd_mkostemps(char *path, int slen, int flags)
+{
+ return(mktemp_internal(path, slen, MKTEMP_FILE, flags));
+}
+
+int
+bsd_mkstemp(char *path)
+{
+ return(mktemp_internal(path, 0, MKTEMP_FILE, MKTEMP_FLAGS_DEFAULT));
+}
+
+int
+bsd_mkostemp(char *path, int flags)
+{
+ return(mktemp_internal(path, 0, MKTEMP_FILE, flags));
+}
+
+int
+bsd_mkstemps(char *path, int slen)
+{
+ return(mktemp_internal(path, slen, MKTEMP_FILE, MKTEMP_FLAGS_DEFAULT));
+}
+
+char *
+bsd_mkdtemp(char *path)
+{
+ int error;
+
+ error = mktemp_internal(path, 0, MKTEMP_DIR, 0);
+ return(error ? NULL : path);
+}
--- /dev/null
+#ifndef CCACHE_THIRD_PARTY_WIN32_MKTEMP_H_
+#define CCACHE_THIRD_PARTY_WIN32_MKTEMP_H_
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int bsd_mkstemp(char *);
+
+// Exposed for testing.
+void bsd_mkstemp_set_random_source(void (*)(void *buf, size_t n));
+
+#ifdef __cplusplus
+}
+#endif
+#endif
${clean_files_prop_name} "${CMAKE_BINARY_DIR}/testdir")
addtest(base)
-addtest(nocpp2)
-addtest(cpp1)
-addtest(multi_arch)
-addtest(serialize_diagnostics)
+addtest(basedir)
+addtest(cache_levels)
+addtest(cleanup)
addtest(color_diagnostics)
-addtest(sanitize_blacklist)
+addtest(cpp1)
addtest(debug_prefix_map)
-addtest(profiling)
-addtest(profiling_gcc)
-addtest(profiling_clang)
-addtest(profiling_hip_clang)
-addtest(split_dwarf)
-addtest(masquerading)
-addtest(hardlink)
-addtest(fileclone)
+addtest(depend)
addtest(direct)
addtest(direct_gcc)
-addtest(depend)
-addtest(basedir)
-addtest(no_compression)
-addtest(readonly)
-addtest(readonly_direct)
-addtest(cache_levels)
-addtest(cleanup)
-addtest(pch)
-addtest(modules)
-addtest(upgrade)
+addtest(fileclone)
+addtest(hardlink)
+addtest(inode_cache)
addtest(input_charset)
+addtest(masquerading)
+addtest(modules)
+addtest(multi_arch)
+addtest(no_compression)
+addtest(nocpp2)
addtest(nvcc)
addtest(nvcc_direct)
addtest(nvcc_ldir)
addtest(nvcc_nocpp2)
-addtest(inode_cache)
+addtest(pch)
+addtest(profiling)
+addtest(profiling_clang)
+addtest(profiling_gcc)
+addtest(profiling_hip_clang)
+addtest(readonly)
+addtest(readonly_direct)
+addtest(sanitize_blacklist)
+addtest(serialize_diagnostics)
+addtest(source_date_epoch)
+addtest(split_dwarf)
+addtest(upgrade)
fi
green() {
- printf "$ansi_boldgreen$*$ansi_reset\n"
+ printf "$ansi_boldgreen%s$ansi_reset\n" "$*"
}
red() {
- printf "$ansi_boldred$*$ansi_reset\n"
+ printf "$ansi_boldred%s$ansi_reset\n" "$*"
}
bold() {
- printf "$ansi_bold$*$ansi_reset\n"
+ printf "$ansi_bold%s$ansi_reset\n" "$*"
}
test_failed() {
generate_code() {
local nlines=$1
local outfile=$2
+ local i
rm -f $outfile
- for i in $(seq $nlines); do
+ for ((i = 1; i <= nlines; i++)); do
echo "int foo_$i(int x) { return x; }" >>$outfile
done
}
else
m=0
fi
- touch -t 1999010100$(printf "%02u" $m) "$@"
+ touch -t $((199901010000 + m)) "$@"
}
file_size() {
expect_stat() {
local stat="$1"
local expected_value="$2"
- local value="$(echo $($CCACHE -s | fgrep "$stat" | cut -c33-))"
+ local line
+ local value=""
+
+ while IFS= read -r line; do
+ if [[ $line = *"$stat"* ]]; then
+ value="${line:32}"
+ # remove leading & trailing whitespace
+ value="${value#${value%%[![:space:]]*}}"
+ value="${value%${value##*[![:space:]]}}"
+ break
+ fi
+ done < <($CCACHE -s)
if [ "$expected_value" != "$value" ]; then
test_failed "Expected \"$stat\" to be $expected_value, actual $value"
elfdump -a -w "$2".dump "$2"
# these were the elfdump fields that seemed to differ (empirically)
diff -I e_shoff -I sh_size -I st_name "$1".dump "$2".dump > /dev/null
+ elif $HOST_OS_WINDOWS && command -v dumpbin.exe >/dev/null; then
+ # Filter out fields that are affected by compilation time or source
+ # filename.
+ local awk_filter='
+ skip {--skip; next}
+
+ /Dump of file/ {next} # dumbin header
+ /time date stamp/ {next} # incremental linker timestamp
+ /number of symbols/ {next} # symbol count
+ /Filename *\| \.file$/ {skip=1; next} # .file symbol
+
+ {print}
+ '
+ dumpbin.exe -all -nologo "$1" | awk "$awk_filter" > "$1".dump
+ dumpbin.exe -all -nologo "$2" | awk "$awk_filter" > "$2".dump
+ cmp -s "$1".dump "$2".dump
else
cmp -s "$1" "$2"
fi
}
reset_environment() {
- while read name; do
- unset $name
- done <<EOF
-$(env | sed -n 's/^\(CCACHE_[A-Z0-9_]*\)=.*$/\1/p')
-EOF
+ while IFS= read -r name; do
+ if [[ $name =~ ^CCACHE_[A-Z0-9_]*$ ]]; then
+ unset $name
+ fi
+ done < <(compgen -e)
+
unset GCC_COLORS
unset TERM
unset XDG_CACHE_HOME
;;
*clang*)
COMPILER_TYPE_CLANG=true
- CLANG_VERSION_SUFFIX=$(echo $COMPILER | sed 's/.*clang//')
+ CLANG_VERSION_SUFFIX=$(echo "${COMPILER%% *}" | sed 's/.*clang//')
;;
*)
echo "WARNING: Compiler $COMPILER not supported (version: $compiler_version) -- not running tests" >&2
PATH_DELIM=":"
fi
+if [[ $OSTYPE = msys* ]]; then
+ # Native symlink support for Windows.
+ export MSYS="${MSYS:-} winsymlinks:nativestrict"
+fi
+
if $HOST_OS_APPLE; then
SDKROOT=$(xcrun --sdk macosx --show-sdk-path 2>/dev/null)
if [ "$SDKROOT" = "" ]; then
echo "Error: xcrun --show-sdk-path failure"
exit 1
fi
+ export SDKROOT
SYSROOT="-isysroot `echo \"$SDKROOT\" | sed 's/ /\\ /g'`"
else
# ---------------------------------------
-all_suites="$(sed -rn 's/^addtest\((.*)\)$/\1/p' $(dirname $0)/CMakeLists.txt)"
+all_suites="$(sed -En 's/^addtest\((.*)\)$/\1/p' $(dirname $0)/CMakeLists.txt)"
for suite in $all_suites; do
. $(dirname $0)/suites/$suite.bash
# The exact output is not tested, but at least it's something human readable
# and not random memory.
- if [ $($CCACHE --version | grep -c '^ccache version [a-zA-Z0-9_./+-]*$') -ne 1 ]; then
+ local version_pattern=$'^ccache version [a-zA-Z0-9_./+-]*\r?$'
+ if [ $($CCACHE --version | grep -E -c "$version_pattern") -ne 1 ]; then
test_failed "Unexpected output of --version"
fi
rm -rf src
# -------------------------------------------------------------------------
+if ! $HOST_OS_WINDOWS; then
TEST "Source file ending with dot"
mkdir src
rm foo.o
rm -rf src
+fi
# -------------------------------------------------------------------------
TEST "Multiple file extensions"
expect_stat 'files in cache' 1
expect_equal_object_files reference_test1.o test1.o
- CCACHE_COMPILER=$COMPILER $CCACHE non_existing_compiler_will_be_overridden_anyway -c test1.c
+ CCACHE_COMPILER=$COMPILER_BIN $CCACHE \
+ non_existing_compiler_will_be_overridden_anyway \
+ $COMPILER_ARGS -c test1.c
expect_stat 'cache hit (preprocessed)' 1
expect_stat 'cache miss' 1
expect_stat 'files in cache' 1
expect_equal_object_files reference_test1.o test1.o
- CCACHE_COMPILER=$COMPILER $CCACHE same/for/relative -c test1.c
+ CCACHE_COMPILER=$COMPILER_BIN $CCACHE same/for/relative \
+ $COMPILER_ARGS -c test1.c
expect_stat 'cache hit (preprocessed)' 2
expect_stat 'cache miss' 1
expect_stat 'files in cache' 1
expect_equal_object_files reference_test1.o test1.o
- CCACHE_COMPILER=$COMPILER $CCACHE /and/even/absolute/compilers -c test1.c
+ CCACHE_COMPILER=$COMPILER_BIN $CCACHE /and/even/absolute/compilers \
+ $COMPILER_ARGS -c test1.c
expect_stat 'cache hit (preprocessed)' 3
expect_stat 'cache miss' 1
expect_stat 'files in cache' 1
chmod +x gcc
CCACHE_DEBUG=1 $CCACHE ./gcc -c test1.c
- compiler_type=$(sed -rn 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log)
+ compiler_type=$(sed -En 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log)
if [ "$compiler_type" != gcc ]; then
test_failed "Compiler type $compiler_type != gcc"
fi
rm test1.o.ccache-log
CCACHE_COMPILERTYPE=clang CCACHE_DEBUG=1 $CCACHE ./gcc -c test1.c
- compiler_type=$(sed -rn 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log)
+ compiler_type=$(sed -En 's/.*Compiler type: (.*)/\1/p' test1.o.ccache-log)
if [ "$compiler_type" != clang ]; then
test_failed "Compiler type $compiler_type != clang"
fi
# -------------------------------------------------------------------------
+if ! $HOST_OS_WINDOWS; then
TEST "CCACHE_UMASK"
saved_umask=$(umask)
expect_perm "$stats_file" -rw-rw-r--
umask $saved_umask
+fi
# -------------------------------------------------------------------------
TEST "No object file due to bad prefix"
CCACHE_PREFIX=`pwd`/empty-object-prefix $CCACHE_COMPILE -c test_empty_obj.c
expect_stat 'compiler produced empty output' 1
+ # -------------------------------------------------------------------------
+ TEST "Output to /dev/null"
+
+ $CCACHE_COMPILE -c test1.c
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ $CCACHE_COMPILE -c test1.c -o /dev/null
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 1
+
# -------------------------------------------------------------------------
TEST "Caching stderr"
fi
# -------------------------------------------------------------------------
+if ! $HOST_OS_WINDOWS; then
TEST "UNCACHED_ERR_FD"
cat >compiler.sh <<'EOF'
if [ "$stderr" != "2Pu1Cc" ]; then
test_failed "Unexpected stderr: $stderr != 2Pu1Cc"
fi
+fi
# -------------------------------------------------------------------------
TEST "Invalid boolean environment configuration options"
local expected_level="$2"
slashes=$(find $CCACHE_DIR -name "*$type" \
- | sed -r -e 's!.*\.ccache/!!' -e 's![^/]*$!!' -e 's![^/]!!g')
+ | sed -E -e 's!.*\.ccache/!!' -e 's![^/]*$!!' -e 's![^/]!!g')
actual_level=$(echo -n "$slashes" | wc -c)
if [ "$actual_level" -ne "$expected_level" ]; then
test_failed "$type file on level $actual_level, expected level $expected_level"
prepare_cleanup_test_dir() {
local dir=$1
+ local i
rm -rf $dir
mkdir -p $dir
- for i in $(seq 0 9); do
- printf '%4017s' '' | tr ' ' 'A' >$dir/result${i}R
+ for ((i = 0; i < 10; ++i)); do
+ printf 'A%.0s' {1..4017} >$dir/result${i}R
backdate $((3 * i + 1)) $dir/result${i}R
done
# NUMFILES: 10, TOTALSIZE: 13 KiB, MAXFILES: 0, MAXSIZE: 0
expect_stat 'cache miss' 1
expect_stat 'cache hit (preprocessed)' 1
- # -------------------------------------------------------------------------
if $COMPILER_TYPE_GCC; then
+ # ---------------------------------------------------------------------
TEST "-fcolor-diagnostics not accepted for GCC"
generate_code 1 test.c
+
+ if $CCACHE_COMPILE -fcolor-diagnostics -c test.c >&/dev/null; then
+ test_failed "-fcolor-diagnostics unexpectedly accepted by GCC"
+ fi
+
+ # ---------------------------------------------------------------------
+ TEST "-fcolor-diagnostics not accepted for GCC for cached result"
+
+ generate_code 1 test.c
+
+ if ! $CCACHE_COMPILE -c test.c >&/dev/null; then
+ test_failed "unknown error compiling"
+ fi
+
if $CCACHE_COMPILE -fcolor-diagnostics -c test.c >&/dev/null; then
test_failed "-fcolor-diagnostics unexpectedly accepted by GCC"
fi
fi
while read -r case; do
+ # ---------------------------------------------------------------------
TEST "Cache object shared across ${case} (run_second_cpp=$run_second_cpp)"
color_diagnostics_generate_code test1.c
SUITE_inode_cache_PROBE() {
+ if $HOST_OS_WINDOWS; then
+ echo "inode cache not available on Windows"
+ return
+ fi
+
temp_dir=$(dirname $($CCACHE -k temporary_dir))
fs=$(stat -fLc %T $temp_dir)
if [ "$fs" = "nfs" ]; then
expect_stat 'files in cache' 3
$cuobjdump test_cuda.o > test1.dump
expect_equal_content reference_test3.dump test1.dump
-
+
# -------------------------------------------------------------------------
TEST "Option -dc"
-
+
$REAL_NVCC $nvcc_opts_cuda -dc -o reference_test4.o test_cuda.cu
$cuobjdump reference_test4.o > reference_test4.dump
expect_stat 'cache hit (direct)' 2
expect_stat 'cache hit (preprocessed)' 0
expect_stat 'cache miss' 1
+
+ # -------------------------------------------------------------------------
+ TEST "Too new PCH file"
+
+ # If the precompiled header is too new we shouldn't cache the result at all
+ # since:
+ #
+ # - the precompiled header content must be included in the hash, but
+ # - we don't trust the precompiled header content so we can't hash it
+ # ourselves, and
+ # - the preprocessed output doesn't contain the preprocessed header content.
+
+ touch lib.h
+ touch main.c
+
+ $REAL_COMPILER $SYSROOT -c lib.h
+ touch -d "@$(($(date +%s) + 60))" lib.h.gch # 1 minute in the future
+
+ CCACHE_SLOPPINESS="$DEFAULT_SLOPPINESS pch_defines,time_macros" $CCACHE_COMPILE -include lib.h -c main.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 0
+ expect_stat "can't use precompiled header" 1
}
pch_suite_clang() {
--- /dev/null
+SUITE_source_date_epoch_PROBE() {
+ echo 'char x[] = __DATE__;' >test.c
+ if ! SOURCE_DATE_EPOCH=0 $REAL_COMPILER -E test.c | grep -q 1970; then
+ echo "SOURCE_DATE_EPOCH not supported by compiler"
+ fi
+}
+
+SUITE_source_date_epoch_SETUP() {
+ echo 'char x;' >without_temporal_macros.c
+ echo 'char x[] = __DATE__;' >with_date_macro.c
+ echo 'char x[] = __TIME__;' >with_time_macro.c
+}
+
+SUITE_source_date_epoch() {
+ # -------------------------------------------------------------------------
+ TEST "Without temporal macro"
+
+ unset CCACHE_NODIRECT
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c without_temporal_macros.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c without_temporal_macros.c
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c without_temporal_macros.c
+ expect_stat 'cache hit (direct)' 2
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ # -------------------------------------------------------------------------
+ TEST "With __DATE__ macro"
+
+ unset CCACHE_NODIRECT
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_date_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_date_macro.c
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c with_date_macro.c
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 1
+
+ # -------------------------------------------------------------------------
+ TEST "With __TIME__ macro"
+
+ unset CCACHE_NODIRECT
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 2
+
+ # -------------------------------------------------------------------------
+ TEST "With __TIME__ and time_macros sloppiness"
+
+ unset CCACHE_NODIRECT
+
+ CCACHE_SLOPPINESS=time_macros SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ CCACHE_SLOPPINESS=time_macros SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ CCACHE_SLOPPINESS=time_macros SOURCE_DATE_EPOCH=2 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 2
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+
+ SOURCE_DATE_EPOCH=1 $CCACHE_COMPILE -c with_time_macro.c
+ expect_stat 'cache hit (direct)' 2
+ expect_stat 'cache hit (preprocessed)' 1
+ expect_stat 'cache miss' 1
+}
elif [ ! -f reference.dwo ] && [ -f test.dwo ]; then
test_failed ".dwo not missing"
fi
+
+ # -------------------------------------------------------------------------
+ TEST "Object file without dot"
+
+ $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+ expect_exists test.dwo
+
+ rm test.dwo
+
+ $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+ expect_exists test.dwo
+
+ # -------------------------------------------------------------------------
+ TEST "Object file with two dots"
+
+ $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test.x.y
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+ expect_exists test.x.dwo
+
+ rm test.x.dwo
+
+ $CCACHE_COMPILE -gsplit-dwarf -c test.c -o test.x.y
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache hit (preprocessed)' 0
+ expect_stat 'cache miss' 1
+ expect_exists test.x.dwo
}
endif()
if(WIN32)
- list(APPEND source_files test_Win32Util.cpp)
+ list(APPEND source_files test_bsdmkstemp.cpp test_Win32Util.cpp)
endif()
add_executable(unittest ${source_files})
unittest
PRIVATE standard_settings standard_warnings ccache_lib third_party_lib)
-target_include_directories(unittest PRIVATE ${CMAKE_BINARY_DIR} . ../src)
+target_include_directories(unittest PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${ccache_SOURCE_DIR}/src)
add_test(NAME unittest COMMAND unittest)
-// Copyright (C) 2011-2020 Joel Rosdahl and other contributors
+// Copyright (C) 2011-2021 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "../src/Config.hpp"
+#include "../src/Sloppiness.hpp"
#include "../src/Util.hpp"
-#include "../src/ccache.hpp"
#include "../src/exceptions.hpp"
#include "../src/fmtmacros.hpp"
#include "TestUtil.hpp"
CHECK(config.compression_level() == 0);
CHECK(config.cpp_extension().empty());
CHECK(!config.debug());
+ CHECK(config.debug_dir().empty());
CHECK(!config.depend_mode());
CHECK(config.direct_mode());
CHECK(!config.disable());
"compression_level = 8\n"
"cpp_extension = ce\n"
"debug = false\n"
+ "debug_dir = /dd\n"
"depend_mode = true\n"
"direct_mode = false\n"
"disable = true\n"
"(test.conf) compression_level = 8",
"(test.conf) cpp_extension = ce",
"(test.conf) debug = false",
+ "(test.conf) debug_dir = /dd",
"(test.conf) depend_mode = true",
"(test.conf) direct_mode = false",
"(test.conf) disable = true",
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "../src/Counters.hpp"
-#include "../src/Statistics.hpp"
+#include "../src/Statistic.hpp"
#include "TestUtil.hpp"
#include "third_party/doctest.h"
CHECK(!Stat::lstat("test.lock"));
}
+TEST_CASE("Lockfile creates missing directories")
+{
+ TestContext test_context;
+
+ Lockfile lock("a/b/c/test", 1000);
+ CHECK(lock.acquired());
+ CHECK(Stat::lstat("a/b/c/test.lock"));
+}
+
#ifndef _WIN32
TEST_CASE("Lockfile breaking")
{
// this program; if not, write to the Free Software Foundation, Inc., 51
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#include "../src/Statistic.hpp"
#include "../src/Statistics.hpp"
#include "../src/Util.hpp"
#include "../src/fmtmacros.hpp"
CHECK(Util::dir_name("/") == "/");
CHECK(Util::dir_name("/foo") == "/");
CHECK(Util::dir_name("/foo/bar/f.txt") == "/foo/bar");
+
+#ifdef _WIN32
+ CHECK(Util::dir_name("C:/x/y") == "C:/x");
+ CHECK(Util::dir_name("X:/x/y") == "X:/x");
+ CHECK(Util::dir_name("C:\\x\\y") == "C:\\x");
+ CHECK(Util::dir_name("C:/x") == "C:/");
+ CHECK(Util::dir_name("C:\\x") == "C:\\");
+ CHECK(Util::dir_name("C:/") == "C:/");
+ CHECK(Util::dir_name("C:\\") == "C:\\");
+#endif
}
TEST_CASE("Util::strip_ansi_csi_seqs")
Util::write_file("0/1/file_c", "12");
Util::write_file("0/f/c/file_d", "123");
- std::vector<std::shared_ptr<CacheFile>> files;
auto null_receiver = [](double) {};
SUBCASE("nonexistent subdirectory")
{
- Util::get_level_1_files("2", null_receiver, files);
+ const auto files = Util::get_level_1_files("2", null_receiver);
CHECK(files.empty());
}
SUBCASE("empty subdirectory")
{
- Util::get_level_1_files("e", null_receiver, files);
+ const auto files = Util::get_level_1_files("e", null_receiver);
CHECK(files.empty());
}
SUBCASE("simple case")
{
- Util::get_level_1_files("0", null_receiver, files);
+ auto files = Util::get_level_1_files("0", null_receiver);
REQUIRE(files.size() == 4);
// Files within a level are in arbitrary order, sort them to be able to
// verify them.
- std::sort(files.begin(),
- files.end(),
- [](const std::shared_ptr<CacheFile>& f1,
- const std::shared_ptr<CacheFile>& f2) {
- return f1->path() < f2->path();
- });
-
- CHECK(files[0]->path() == os_path("0/1/file_b"));
- CHECK(files[0]->lstat().size() == 1);
- CHECK(files[1]->path() == os_path("0/1/file_c"));
- CHECK(files[1]->lstat().size() == 2);
- CHECK(files[2]->path() == os_path("0/f/c/file_d"));
- CHECK(files[2]->lstat().size() == 3);
- CHECK(files[3]->path() == os_path("0/file_a"));
- CHECK(files[3]->lstat().size() == 0);
+ std::sort(
+ files.begin(), files.end(), [](const CacheFile& f1, const CacheFile& f2) {
+ return f1.path() < f2.path();
+ });
+
+ CHECK(files[0].path() == os_path("0/1/file_b"));
+ CHECK(files[0].lstat().size() == 1);
+ CHECK(files[1].path() == os_path("0/1/file_c"));
+ CHECK(files[1].lstat().size() == 2);
+ CHECK(files[2].path() == os_path("0/f/c/file_d"));
+ CHECK(files[2].lstat().size() == 3);
+ CHECK(files[3].path() == os_path("0/file_a"));
+ CHECK(files[3].lstat().size() == 0);
}
}
== "/zz/ccache/A/B/C/D/EF.suffix");
}
+TEST_CASE("Util::hard_link")
+{
+ TestContext test_context;
+
+ SUBCASE("Link file to nonexistent destination")
+ {
+ Util::write_file("old", "content");
+ CHECK_NOTHROW(Util::hard_link("old", "new"));
+ CHECK(Util::read_file("new") == "content");
+ }
+
+ SUBCASE("Link file to existing destination")
+ {
+ Util::write_file("old", "content");
+ Util::write_file("new", "other content");
+ CHECK_NOTHROW(Util::hard_link("old", "new"));
+ CHECK(Util::read_file("new") == "content");
+ }
+
+ SUBCASE("Link nonexistent file")
+ {
+ CHECK_THROWS_AS(Util::hard_link("old", "new"), Error);
+ }
+}
+
TEST_CASE("Util::int_to_big_endian")
{
uint8_t bytes[8];
#endif
}
+TEST_CASE("Util::make_relative_path")
+{
+ using Util::make_relative_path;
+
+ const TestContext test_context;
+
+ const std::string cwd = Util::get_actual_cwd();
+ const std::string actual_cwd = FMT("{}/d", cwd);
+#ifdef _WIN32
+ const std::string apparent_cwd = actual_cwd;
+#else
+ const std::string apparent_cwd = FMT("{}/s", cwd);
+#endif
+
+ REQUIRE(Util::create_dir("d"));
+#ifndef _WIN32
+ REQUIRE(symlink("d", "s") == 0);
+#endif
+ REQUIRE(chdir("d") == 0);
+ Util::setenv("PWD", apparent_cwd);
+
+ SUBCASE("No base directory")
+ {
+ CHECK(make_relative_path("", "/a", "/a", "/a/x") == "/a/x");
+ }
+
+ SUBCASE("Path matches neither actual nor apparent CWD")
+ {
+#ifdef _WIN32
+ CHECK(make_relative_path("C:/", "C:/a", "C:/b", "C:/x") == "C:/x");
+#else
+ CHECK(make_relative_path("/", "/a", "/b", "/x") == "/x");
+#endif
+ }
+
+ SUBCASE("Match of actual CWD")
+ {
+#ifdef _WIN32
+ CHECK(
+ make_relative_path(
+ actual_cwd.substr(0, 3), actual_cwd, apparent_cwd, actual_cwd + "/x")
+ == "./x");
+#else
+ CHECK(make_relative_path("/", actual_cwd, apparent_cwd, actual_cwd + "/x")
+ == "./x");
+#endif
+ }
+
+#ifndef _WIN32
+ SUBCASE("Match of apparent CWD")
+ {
+ CHECK(make_relative_path("/", actual_cwd, apparent_cwd, apparent_cwd + "/x")
+ == "./x");
+ }
+
+ SUBCASE("Match if using resolved (using realpath(3)) path")
+ {
+ CHECK(make_relative_path("/", actual_cwd, actual_cwd, apparent_cwd + "/x")
+ == "./x");
+ }
+#endif
+}
+
TEST_CASE("Util::matches_dir_prefix_or_file")
{
CHECK(!Util::matches_dir_prefix_or_file("", ""));
#include "../src/Args.hpp"
#include "../src/Config.hpp"
#include "../src/Context.hpp"
-#include "../src/Statistics.hpp"
+#include "../src/Statistic.hpp"
#include "../src/Util.hpp"
#include "../src/fmtmacros.hpp"
#include "TestUtil.hpp"
--- /dev/null
+// Copyright (C) 2020-2021 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "../src/Fd.hpp"
+#include "../src/Finalizer.hpp"
+#include "TestUtil.hpp"
+
+#include "third_party/doctest.h"
+#include "third_party/win32/mktemp.h"
+
+#include <algorithm>
+#include <memory>
+#include <ostream>
+#include <sddl.h>
+#include <utility>
+
+using TestUtil::TestContext;
+
+namespace {
+
+class ScopedHANDLE
+{
+public:
+ ScopedHANDLE() = default;
+
+ explicit ScopedHANDLE(HANDLE handle) : m_handle(handle)
+ {
+ }
+
+ ScopedHANDLE(ScopedHANDLE&& other) : ScopedHANDLE(other.release())
+ {
+ }
+
+ ~ScopedHANDLE()
+ {
+ if (m_handle != INVALID_HANDLE_VALUE) {
+ CloseHandle(m_handle);
+ }
+ }
+
+ ScopedHANDLE&
+ operator=(ScopedHANDLE rhs)
+ {
+ std::swap(m_handle, rhs.m_handle);
+ return *this;
+ }
+
+ explicit operator bool() const
+ {
+ return m_handle != INVALID_HANDLE_VALUE;
+ }
+
+ HANDLE
+ get() const
+ {
+ return m_handle;
+ }
+
+ HANDLE
+ release()
+ {
+ HANDLE handle = m_handle;
+ m_handle = INVALID_HANDLE_VALUE;
+ return handle;
+ }
+
+private:
+ HANDLE m_handle = INVALID_HANDLE_VALUE;
+};
+
+} // namespace
+
+TEST_SUITE_BEGIN("bsd_mkstemp");
+
+TEST_CASE("bsd_mkstemp")
+{
+ TestContext test_context;
+
+ static uint16_t rand_iter;
+ rand_iter = 0;
+
+ bsd_mkstemp_set_random_source([](void* buf, size_t nbytes) {
+ std::fill_n(
+ static_cast<uint16_t*>(buf), nbytes / sizeof(uint16_t), rand_iter);
+ ++rand_iter;
+ });
+
+ Finalizer reset_random_source([] { bsd_mkstemp_set_random_source(nullptr); });
+
+ SUBCASE("successful")
+ {
+ std::string path = "XXXXXX";
+ CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno);
+ CHECK(path == "AAAAAA");
+ }
+
+ SUBCASE("existing file")
+ {
+ CHECK_MESSAGE(ScopedHANDLE(CreateFileA("AAAAAA",
+ GENERIC_READ | GENERIC_WRITE,
+ 0,
+ nullptr,
+ CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL,
+ nullptr)),
+ "errno=" << errno);
+
+ std::string path = "XXXXXX";
+ CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno);
+ CHECK(path == "BBBBBB");
+ }
+
+ SUBCASE("existing file, pending delete")
+ {
+ ScopedHANDLE h;
+ CHECK_MESSAGE(
+ (h = ScopedHANDLE(CreateFileA("AAAAAA",
+ GENERIC_READ | GENERIC_WRITE | DELETE,
+ 0,
+ nullptr,
+ CREATE_NEW,
+ FILE_ATTRIBUTE_NORMAL,
+ nullptr))),
+ "errno=" << errno);
+
+ // Mark file as deleted. This puts it into a "pending delete" state that
+ // will persist until the handle is closed.
+ FILE_DISPOSITION_INFO info{};
+ info.DeleteFile = TRUE;
+ CHECK_MESSAGE(SetFileInformationByHandle(
+ h.get(), FileDispositionInfo, &info, sizeof(info)),
+ "errno=" << errno);
+
+ std::string path = "XXXXXX";
+ CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno);
+ CHECK(path == "BBBBBB");
+ }
+
+ SUBCASE("existing directory")
+ {
+ CHECK_MESSAGE(CreateDirectoryA("AAAAAA", nullptr), "errno=" << errno);
+
+ std::string path = "XXXXXX";
+ CHECK_MESSAGE(Fd(bsd_mkstemp(&path[0])), "errno=" << errno);
+ CHECK(path == "BBBBBB");
+ }
+
+ SUBCASE("permission denied")
+ {
+ auto make_ACL = [](const char* acl_string) {
+ PSECURITY_DESCRIPTOR desc = nullptr;
+ ConvertStringSecurityDescriptorToSecurityDescriptorA(
+ acl_string, SDDL_REVISION_1, &desc, nullptr);
+ return std::shared_ptr<SECURITY_DESCRIPTOR>(
+ static_cast<SECURITY_DESCRIPTOR*>(desc), &LocalFree);
+ };
+
+ // Create a directory with a contrived ACL that denies creation of new files
+ // and directories to the "Everybody" (WD) group.
+ std::shared_ptr<SECURITY_DESCRIPTOR> desc;
+ CHECK_MESSAGE((desc = make_ACL("D:(D;;DCLCRPCR;;;WD)(A;;FA;;;WD)")),
+ "errno=" << errno);
+
+ SECURITY_ATTRIBUTES attrs{};
+ attrs.nLength = sizeof(attrs);
+ attrs.lpSecurityDescriptor = desc.get();
+ CHECK_MESSAGE(CreateDirectoryA("my_readonly_dir", &attrs),
+ "errno=" << errno);
+
+ // Sanity check that we cannot write to this directory. (E.g. Wine doesn't
+ // appear to emulate Windows ACLs properly when run under root.)
+ bool broken_acls = static_cast<bool>(ScopedHANDLE(
+ CreateFileA("my_readonly_dir/.writable",
+ GENERIC_WRITE,
+ 0,
+ nullptr,
+ CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE,
+ nullptr)));
+
+ if (!broken_acls) {
+ std::string path = "my_readonly_dir/XXXXXX";
+ CHECK(!Fd(bsd_mkstemp(&path[0])));
+ CHECK(errno == EACCES);
+ } else {
+ MESSAGE("ACLs do not appear to function properly on this filesystem");
+ }
+ }
+}
+
+TEST_SUITE_END();
// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "../src/Context.hpp"
+#include "../src/Sloppiness.hpp"
#include "../src/ccache.hpp"
#include "../src/fmtmacros.hpp"
#include "TestUtil.hpp"