From f05c95f10fc1d8171071735af8ad3a9e87633120 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 6 Oct 2021 14:03:48 -0700 Subject: [PATCH] [flang] More work on SYSTEM_CLOCK runtime API and implementation To get proper wrap-around behavior for the various kind parameter values of the optional COUNT= and COUNT_MAX= dummy arguments to the intrinsic subroutine SYSTEM_CLOCK, add an extra argument to the APIs for lowering to pass the integer kind of the actual argument. Avoid confusion by requiring that both actual arguments have the same kind when both are present. The results of the runtime functions remain std::int64_t and lowering should still convert them before storing to the actual argument variables. Rework the implementation a bit to accomodate the dynamic specification of the kind parameter, and to clean up some coding issues with preprocessing and templates. Use the kind of the COUNT=/COUNT_MAX= actual arguments to determine the clock's resolution, where possible, in conformance with other Fortran implementations. Differential Revision: https://reviews.llvm.org/D111281 --- flang/docs/Extensions.md | 7 ++ flang/include/flang/Runtime/time-intrinsic.h | 14 ++- flang/lib/Evaluate/intrinsics.cpp | 4 +- flang/runtime/time-intrinsic.cpp | 157 ++++++++++++++------------- flang/unittests/Runtime/Time.cpp | 45 ++++++-- 5 files changed, 133 insertions(+), 94 deletions(-) diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index f35954b..ac293ce 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -63,6 +63,13 @@ end not the bounds of the implied DO loop. It is not advisable to use an object of the same name as the index variable in a bounds expression, but it will work, instead of being needlessly undefined. +* If both the `COUNT=` and the `COUNT_MAX=` optional arguments are + present on the same call to the intrinsic subroutine `SYSTEM_CLOCK`, + we require that their types have the same integer kind, since the + kind of these arguments is used to select the clock rate. + In common with some other compilers, the clock is in milliseconds + for kinds <= 4 and nanoseconds otherwise where the target system + supports these rates. ## Extensions, deletions, and legacy features supported by default diff --git a/flang/include/flang/Runtime/time-intrinsic.h b/flang/include/flang/Runtime/time-intrinsic.h index c48e961..4600099 100644 --- a/flang/include/flang/Runtime/time-intrinsic.h +++ b/flang/include/flang/Runtime/time-intrinsic.h @@ -12,8 +12,8 @@ #ifndef FORTRAN_RUNTIME_TIME_INTRINSIC_H_ #define FORTRAN_RUNTIME_TIME_INTRINSIC_H_ -#include "flang/Runtime/cpp-type.h" #include "flang/Runtime/entry-names.h" +#include namespace Fortran::runtime { @@ -27,10 +27,14 @@ double RTNAME(CpuTime)(); // Interface for the SYSTEM_CLOCK intrinsic. We break it up into 3 distinct // function calls, one for each of SYSTEM_CLOCK's optional output arguments. -// Lowering will have to cast the results to whatever type it prefers. -CppTypeFor RTNAME(SystemClockCount)(); -CppTypeFor RTNAME(SystemClockCountRate)(); -CppTypeFor RTNAME(SystemClockCountMax)(); +// Lowering converts the results to the types of the actual arguments, +// including the case of a real argument for COUNT_RATE=.. +// The kind argument to SystemClockCount and SystemClockCountMax is the +// kind of the integer actual arguments, which are required to be the same +// when both appear. +std::int64_t RTNAME(SystemClockCount)(int kind = 8); +std::int64_t RTNAME(SystemClockCountRate)(int kind = 8); +std::int64_t RTNAME(SystemClockCountMax)(int kind = 8); // Interface for DATE_AND_TIME intrinsic. void RTNAME(DateAndTime)(char *date, std::size_t dateChars, char *time, diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 2324b78..519f0e6 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -1127,11 +1127,11 @@ static const IntrinsicInterface intrinsicSubroutine[]{ common::Intent::Out}}, {}, Rank::elemental, IntrinsicClass::impureSubroutine}, {"system_clock", - {{"count", AnyInt, Rank::scalar, Optionality::optional, + {{"count", SameInt, Rank::scalar, Optionality::optional, common::Intent::Out}, {"count_rate", AnyIntOrReal, Rank::scalar, Optionality::optional, common::Intent::Out}, - {"count_max", AnyInt, Rank::scalar, Optionality::optional, + {"count_max", SameInt, Rank::scalar, Optionality::optional, common::Intent::Out}}, {}, Rank::elemental, IntrinsicClass::impureSubroutine}, }; diff --git a/flang/runtime/time-intrinsic.cpp b/flang/runtime/time-intrinsic.cpp index 7a70f52..f7ef5be 100644 --- a/flang/runtime/time-intrinsic.cpp +++ b/flang/runtime/time-intrinsic.cpp @@ -11,6 +11,7 @@ #include "flang/Runtime/time-intrinsic.h" #include "terminator.h" #include "tools.h" +#include "flang/Runtime/cpp-type.h" #include "flang/Runtime/descriptor.h" #include #include @@ -51,18 +52,10 @@ template double GetCpuTime(fallback_implementation) { if (timestamp != static_cast(-1)) { return static_cast(timestamp) / CLOCKS_PER_SEC; } - // Return some negative value to represent failure. return -1.0; } -// POSIX implementation using clock_gettime. This is only enabled if -// clock_gettime is available. -template -double GetCpuTime(preferred_implementation, - // We need some dummy parameters to pass to decltype(clock_gettime). - T ClockId = 0, U *Timespec = nullptr, - decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) { #if defined CLOCK_THREAD_CPUTIME_ID #define CLOCKID CLOCK_THREAD_CPUTIME_ID #elif defined CLOCK_PROCESS_CPUTIME_ID @@ -72,106 +65,119 @@ double GetCpuTime(preferred_implementation, #else #define CLOCKID CLOCK_REALTIME #endif + +// POSIX implementation using clock_gettime. This is only enabled where +// clock_gettime is available. +template +double GetCpuTime(preferred_implementation, + // We need some dummy parameters to pass to decltype(clock_gettime). + T ClockId = 0, U *Timespec = nullptr, + decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) { struct timespec tspec; if (clock_gettime(CLOCKID, &tspec) == 0) { return tspec.tv_nsec * 1.0e-9 + tspec.tv_sec; } - // Return some negative value to represent failure. return -1.0; } -using count_t = - Fortran::runtime::CppTypeFor; +using count_t = std::int64_t; +using unsigned_count_t = std::uint64_t; + +// Computes HUGE(INT(0,kind)) as an unsigned integer value. +static constexpr inline unsigned_count_t GetHUGE(int kind) { + if (kind > 8) { + kind = 8; + } + return (unsigned_count_t{1} << ((8 * kind) - 1)) - 1; +} // This is the fallback implementation, which should work everywhere. Note that // in general we can't recover after std::clock has reached its maximum value. template -count_t GetSystemClockCount(fallback_implementation) { +count_t GetSystemClockCount(int kind, fallback_implementation) { std::clock_t timestamp{std::clock()}; if (timestamp == static_cast(-1)) { - // Return -HUGE() to represent failure. - return -std::numeric_limits::max(); + // Return -HUGE(COUNT) to represent failure. + return -static_cast(GetHUGE(kind)); } - - // If our return type is large enough to hold any value returned by - // std::clock, our work is done. Otherwise, we have to wrap around. - static constexpr auto max{std::numeric_limits::max()}; - if constexpr (std::numeric_limits::max() <= max) { - return static_cast(timestamp); - } else { - // Since std::clock_t could be a floating point type, we can't just use the - // % operator, so we have to wrap around manually. - return static_cast(timestamp - max * std::floor(timestamp / max)); + // Convert the timestamp to std::uint64_t with wrap-around. The timestamp is + // most likely a floating-point value (since C'11), so compute the modulus + // carefully when one is required. + constexpr auto maxUnsignedCount{std::numeric_limits::max()}; + if constexpr (std::numeric_limits::max() > maxUnsignedCount) { + timestamp -= maxUnsignedCount * std::floor(timestamp / maxUnsignedCount); } + unsigned_count_t unsignedCount{static_cast(timestamp)}; + // Return the modulus of the unsigned integral count with HUGE(COUNT)+1. + // The result is a signed integer but never negative. + return static_cast(unsignedCount % (GetHUGE(kind) + 1)); } template -count_t GetSystemClockCountRate(fallback_implementation) { +count_t GetSystemClockCountRate(int kind, fallback_implementation) { return CLOCKS_PER_SEC; } template -count_t GetSystemClockCountMax(fallback_implementation) { - static constexpr auto max_clock_t = std::numeric_limits::max(); - static constexpr auto max_count_t = std::numeric_limits::max(); - if constexpr (max_clock_t < max_count_t) { - return static_cast(max_clock_t); - } else { - return max_count_t; - } +count_t GetSystemClockCountMax(int kind, fallback_implementation) { + constexpr auto max_clock_t{std::numeric_limits::max()}; + unsigned_count_t maxCount{GetHUGE(kind)}; + return max_clock_t <= maxCount ? static_cast(max_clock_t) + : static_cast(maxCount); } -constexpr count_t NSECS_PER_SEC{1'000'000'000}; +// POSIX implementation using clock_gettime. This is only enabled where +// clock_gettime is available. Use a millisecond CLOCK_RATE for kinds +// of COUNT/COUNT_MAX less than 64 bits, and nanoseconds otherwise. +constexpr unsigned_count_t MILLIS_PER_SEC{1'000u}; +constexpr unsigned_count_t NSECS_PER_SEC{1'000'000'000u}; +constexpr unsigned_count_t maxSecs{ + std::numeric_limits::max() / NSECS_PER_SEC}; + +// Use a millisecond clock rate for smaller COUNT= kinds. +static inline unsigned_count_t ScaleResult(unsigned_count_t nsecs, int kind) { + return kind >= 8 ? nsecs : nsecs / (NSECS_PER_SEC / MILLIS_PER_SEC); +} -// POSIX implementation using clock_gettime. This is only enabled if -// clock_gettime is available. template -count_t GetSystemClockCount(preferred_implementation, +count_t GetSystemClockCount(int kind, preferred_implementation, // We need some dummy parameters to pass to decltype(clock_gettime). T ClockId = 0, U *Timespec = nullptr, decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) { -#if defined CLOCK_THREAD_CPUTIME_ID -#define CLOCKID CLOCK_THREAD_CPUTIME_ID -#elif defined CLOCK_PROCESS_CPUTIME_ID -#define CLOCKID CLOCK_PROCESS_CPUTIME_ID -#elif defined CLOCK_MONOTONIC -#define CLOCKID CLOCK_MONOTONIC -#else -#define CLOCKID CLOCK_REALTIME -#endif struct timespec tspec; if (clock_gettime(CLOCKID, &tspec) != 0) { // Return -HUGE() to represent failure. - return -std::numeric_limits::max(); + return -GetHUGE(kind); } - // Wrap around to avoid overflows. - constexpr count_t max_secs{ - std::numeric_limits::max() / NSECS_PER_SEC}; - count_t wrapped_secs{tspec.tv_sec % max_secs}; - - // At this point, wrapped_secs < max_secs, and max_secs has already been - // truncated by the division. Therefore, we should still have enough room to - // add tv_nsec, since it is < NSECS_PER_SEC. - return tspec.tv_nsec + wrapped_secs * NSECS_PER_SEC; + unsigned_count_t wrappedSecs{ + static_cast(tspec.tv_sec) % maxSecs}; + unsigned_count_t unsignedNsecs{static_cast(tspec.tv_nsec) + + wrappedSecs * NSECS_PER_SEC}; + unsigned_count_t unsignedCount{ScaleResult(unsignedNsecs, kind)}; + // Return the modulus of the unsigned integral count with HUGE(COUNT)+1. + // The result is a signed integer but never negative. + return static_cast(unsignedCount % (GetHUGE(kind) + 1)); } template -count_t GetSystemClockCountRate(preferred_implementation, +count_t GetSystemClockCountRate(int kind, preferred_implementation, // We need some dummy parameters to pass to decltype(clock_gettime). T ClockId = 0, U *Timespec = nullptr, decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) { - return NSECS_PER_SEC; + return kind >= 8 ? static_cast(NSECS_PER_SEC) : MILLIS_PER_SEC; } template -count_t GetSystemClockCountMax(preferred_implementation, +count_t GetSystemClockCountMax(int kind, preferred_implementation, // We need some dummy parameters to pass to decltype(clock_gettime). T ClockId = 0, U *Timespec = nullptr, decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) { - count_t max_secs{std::numeric_limits::max() / NSECS_PER_SEC}; - return max_secs * NSECS_PER_SEC - 1; + unsigned_count_t maxClockNsec{maxSecs * NSECS_PER_SEC + NSECS_PER_SEC - 1}; + unsigned_count_t maxClock{ScaleResult(maxClockNsec, kind)}; + unsigned_count_t maxCount{GetHUGE(kind)}; + return static_cast(maxClock <= maxCount ? maxClock : maxCount); } // DATE_AND_TIME (Fortran 2018 16.9.59) @@ -198,7 +204,7 @@ template struct StoreNegativeHugeAt { // Default implementation when date and time information is not available (set // strings to blanks and values to -HUGE as defined by the standard). -void DateAndTimeUnavailable(Fortran::runtime::Terminator &terminator, +static void DateAndTimeUnavailable(Fortran::runtime::Terminator &terminator, char *date, std::size_t dateChars, char *time, std::size_t timeChars, char *zone, std::size_t zoneChars, const Fortran::runtime::Descriptor *values) { @@ -259,9 +265,9 @@ template struct GmtOffsetHelper { }; }; -// Dispatch to posix implemetation when gettimeofday and localtime_r are +// Dispatch to posix implementation where gettimeofday and localtime_r are // available. -void GetDateAndTime(Fortran::runtime::Terminator &terminator, char *date, +static void GetDateAndTime(Fortran::runtime::Terminator &terminator, char *date, std::size_t dateChars, char *time, std::size_t timeChars, char *zone, std::size_t zoneChars, const Fortran::runtime::Descriptor *values) { @@ -330,9 +336,9 @@ void GetDateAndTime(Fortran::runtime::Terminator &terminator, char *date, } #else -// Fallback implementation when gettimeofday or localtime_r is not available -// (e.g. windows). -void GetDateAndTime(Fortran::runtime::Terminator &terminator, char *date, +// Fallback implementation where gettimeofday or localtime_r are not both +// available (e.g. windows). +static void GetDateAndTime(Fortran::runtime::Terminator &terminator, char *date, std::size_t dateChars, char *time, std::size_t timeChars, char *zone, std::size_t zoneChars, const Fortran::runtime::Descriptor *values) { // TODO: An actual implementation for non Posix system should be added. @@ -342,26 +348,23 @@ void GetDateAndTime(Fortran::runtime::Terminator &terminator, char *date, terminator, date, dateChars, time, timeChars, zone, zoneChars, values); } #endif -} // anonymous namespace +} // namespace namespace Fortran::runtime { extern "C" { double RTNAME(CpuTime)() { return GetCpuTime(0); } -CppTypeFor RTNAME( - SystemClockCount)() { - return GetSystemClockCount(0); +std::int64_t RTNAME(SystemClockCount)(int kind) { + return GetSystemClockCount(kind, 0); } -CppTypeFor RTNAME( - SystemClockCountRate)() { - return GetSystemClockCountRate(0); +std::int64_t RTNAME(SystemClockCountRate)(int kind) { + return GetSystemClockCountRate(kind, 0); } -CppTypeFor RTNAME( - SystemClockCountMax)() { - return GetSystemClockCountMax(0); +std::int64_t RTNAME(SystemClockCountMax)(int kind) { + return GetSystemClockCountMax(kind, 0); } void RTNAME(DateAndTime)(char *date, std::size_t dateChars, char *time, diff --git a/flang/unittests/Runtime/Time.cpp b/flang/unittests/Runtime/Time.cpp index 4720e7e..b6898d4 100644 --- a/flang/unittests/Runtime/Time.cpp +++ b/flang/unittests/Runtime/Time.cpp @@ -31,7 +31,7 @@ TEST(TimeIntrinsics, CpuTime) { } } -using count_t = CppTypeFor; +using count_t = std::int64_t; TEST(TimeIntrinsics, SystemClock) { // We can't really test that we get the "right" result for SYSTEM_CLOCK, but @@ -43,21 +43,46 @@ TEST(TimeIntrinsics, SystemClock) { // SYSTEM_CLOCK. EXPECT_GT(RTNAME(SystemClockCountRate)(), 0); - count_t max{RTNAME(SystemClockCountMax)()}; - EXPECT_GT(max, 0); + count_t max1{RTNAME(SystemClockCountMax)(1)}; + EXPECT_GT(max1, 0); + EXPECT_LE(max1, static_cast(0x7f)); + count_t start1{RTNAME(SystemClockCount)(1)}; + EXPECT_GE(start1, 0); + EXPECT_LE(start1, max1); - count_t start{RTNAME(SystemClockCount)()}; - EXPECT_GE(start, 0); - EXPECT_LE(start, max); + count_t max2{RTNAME(SystemClockCountMax)(2)}; + EXPECT_GT(max2, 0); + EXPECT_LE(max2, static_cast(0x7fff)); + count_t start2{RTNAME(SystemClockCount)(2)}; + EXPECT_GE(start2, 0); + EXPECT_LE(start2, max2); + + count_t max4{RTNAME(SystemClockCountMax)(4)}; + EXPECT_GT(max4, 0); + EXPECT_LE(max4, static_cast(0x7fffffff)); + count_t start4{RTNAME(SystemClockCount)(4)}; + EXPECT_GE(start4, 0); + EXPECT_LE(start4, max4); + + count_t max8{RTNAME(SystemClockCountMax)(8)}; + EXPECT_GT(max8, 0); + count_t start8{RTNAME(SystemClockCount)(8)}; + EXPECT_GE(start8, 0); + EXPECT_LT(start8, max8); + + count_t max16{RTNAME(SystemClockCountMax)(16)}; + EXPECT_GT(max16, 0); + count_t start16{RTNAME(SystemClockCount)(16)}; + EXPECT_GE(start16, 0); + EXPECT_LT(start16, max16); // Loop until we get a different value from SystemClockCount. If we don't get // one before we time out, then we should probably look into an implementation // for SystemClokcCount with a better timer resolution on this platform. - for (count_t end = start; end == start; end = RTNAME(SystemClockCount)()) { + for (count_t end{start8}; end == start8; end = RTNAME(SystemClockCount)(8)) { EXPECT_GE(end, 0); - EXPECT_LE(end, max); - - EXPECT_GE(end, start); + EXPECT_LE(end, max8); + EXPECT_GE(end, start8); } } -- 2.7.4