From d454328ea88562a6ec6260529a040035ab9c4a06 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Sat, 17 Oct 2020 08:40:44 -0700 Subject: [PATCH] [ML] Add final reward logging facility. Allow logging final rewards. A final reward is logged only once, and is serialized as all-zero values, except for the last one. Differential Revision: https://reviews.llvm.org/D89626 --- llvm/include/llvm/Analysis/Utils/TFUtils.h | 5 +++ llvm/lib/Analysis/TFUtils.cpp | 55 ++++++++++++++++++------------ llvm/unittests/Analysis/TFUtilsTest.cpp | 45 +++++++++++++++++++++++- 3 files changed, 82 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h index 522dcff..0e697a4 100644 --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -139,6 +139,11 @@ public: logTensorValue(RawLogData.size() - 1, &Value); } + template void logFinalReward(T Value) { + assert(RawLogData.back().empty()); + logReward(Value); + } + template void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) { const char *Start = reinterpret_cast(Value); diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp index 1357cd9..a84e967 100644 --- a/llvm/lib/Analysis/TFUtils.cpp +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -77,10 +77,15 @@ void writeTensorValues(raw_ostream &OutFile, const char *TensorData, OutFile << "]"; } -/// Untyped implementation of the API above. +/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. +/// The tensors are assumed to be stored contiguously, in row-major format, +/// in the TensorData buffer. Each tensor has the shape given by Spec. The +/// feature name in the output is either the provided LoggingName, if +/// specified, otherwise it's the name of the tensor (as given by Spec). void writeRawTensorsAsFeatureLists(raw_ostream &OutFile, const Logger::LoggedFeatureSpec &LoggedSpec, - const char *TensorData, size_t TensorCount) { + const char *TensorData, size_t TensorCount, + bool FinalReward = false) { const char *FieldName = ""; std::function ValueWriter; const auto &Spec = LoggedSpec.Spec; @@ -115,29 +120,31 @@ void writeRawTensorsAsFeatureLists(raw_ostream &OutFile, << "\" "; OutFile << "value: {\n"; size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize(); - for (const char *P = TensorData, - *E = TensorData + TensorByteSize * TensorCount; - P < E; P += TensorByteSize) { + + auto WriteFeatureProto = [&](const char *P) { OutFile << " feature: { " << FieldName << ": { value: "; ValueWriter(P); OutFile << " } }\n"; + }; + + const char *CurrentTensor = TensorData; + static int64_t Zero = 0; + // Write all but the last value. If this is the final reward, don't increment + // the CurrentTensor, and just write 0. + for (size_t I = 0; I < TensorCount - 1; ++I) { + if (FinalReward) + WriteFeatureProto(reinterpret_cast(&Zero)); + else { + WriteFeatureProto(CurrentTensor); + CurrentTensor += TensorByteSize; + } } + + WriteFeatureProto(CurrentTensor); + OutFile << " }\n"; OutFile << " }\n"; } - -/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. -/// The tensors are assumed to be stored contiguously, in row-major format, -/// in the TensorData buffer. Each tensor has the shape given by Spec. The -/// feature name in the output is either the provided LoggingName, if -/// specified, otherwise it's the name of the tensor (as given by Spec). -template -void writeTensorsAsFeatureLists(raw_ostream &OutFile, - const Logger::LoggedFeatureSpec &Spec, - const T *TensorData, size_t TensorCount) { - writeRawTensorsAsFeatureLists( - OutFile, Spec, reinterpret_cast(TensorData), TensorCount); -} } // namespace namespace llvm { @@ -405,15 +412,19 @@ void Logger::print(raw_ostream &OS) { size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size; if (NumberOfRecords == 0) return; + size_t RewardSize = + RewardSpec.getElementCount() * RewardSpec.getElementByteSize(); + size_t NumberOfRewards = RawLogData.back().size() / RewardSize; OS << "feature_lists: {\n"; for (size_t I = 0; I < FeatureSpecs.size(); ++I) - writeTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(), - NumberOfRecords); + writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(), + NumberOfRecords); if (IncludeReward) - writeTensorsAsFeatureLists(OS, {RewardSpec, None}, RawLogData.back().data(), - NumberOfRecords); + writeRawTensorsAsFeatureLists(OS, {RewardSpec, None}, + RawLogData.back().data(), NumberOfRecords, + NumberOfRewards == 1); OS << "}\n"; } diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp index d2ad977..f07bc27 100644 --- a/llvm/unittests/Analysis/TFUtilsTest.cpp +++ b/llvm/unittests/Analysis/TFUtilsTest.cpp @@ -227,4 +227,47 @@ TEST(TFUtilsTest, LoggerNoReward) { raw_string_ostream OS(Result); L.print(OS); EXPECT_EQ(Result, Expected); -} \ No newline at end of file +} + +TEST(TFUtilsTest, LoggerFinalReward) { + std::vector Features; + Features.push_back({TensorSpec::createSpec("the_float", {1}), None}); + Features.push_back({TensorSpec::createSpec("the_int", {1}), None}); + + auto Rewards = TensorSpec::createSpec("reward", {1}); + Logger L(Features, Rewards, true); + for (size_t I = 0; I < 3; ++I) { + float F = static_cast(I); + L.logTensorValue(0, &F); + L.logTensorValue(1, &I); + } + L.logFinalReward(3.14); + const auto *Expected = R"(feature_lists: { + feature_list: { + key: "the_float" value: { + feature: { float_list: { value: [0.000000e+00] } } + feature: { float_list: { value: [1.000000e+00] } } + feature: { float_list: { value: [2.000000e+00] } } + } + } + feature_list: { + key: "the_int" value: { + feature: { int64_list: { value: [0] } } + feature: { int64_list: { value: [1] } } + feature: { int64_list: { value: [2] } } + } + } + feature_list: { + key: "reward" value: { + feature: { float_list: { value: [0.000000e+00] } } + feature: { float_list: { value: [0.000000e+00] } } + feature: { float_list: { value: [3.140000e+00] } } + } + } +} +)"; + std::string Result; + raw_string_ostream OS(Result); + L.print(OS); + EXPECT_EQ(Result, Expected); +} -- 2.7.4