Also use InlinedVector instead of std::vector for non-async path
Before:
Benchmark Time(ns) CPU(ns) Iterations
-------------------------------------------------------------
BM_Execute/0 1895 1898 360200 Execute
BM_Execute/1 1193 1942 358322 ExecuteAsync
BM_ExecuteFunction/0 5812 5825 100000 ExecuteFunction
BM_ExecuteFunction/1 5015 5374 100000 ExecuteFunctionAsync
After:
Benchmark Time(ns) CPU(ns) Iterations
-------------------------------------------------------------
BM_Execute/0 1604 1607 428262 Execute
BM_Execute/1 1150 1765 404821 ExecuteAsync
BM_ExecuteFunction/0 5615 5626 100000 ExecuteFunction
BM_ExecuteFunction/1 5111 5476 100000 ExecuteFunctionAsync
PiperOrigin-RevId:
193218331
opts->async, std::move(device_mgr), r);
}
-void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) {
- delete ctx;
-}
+void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { delete ctx; }
TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) {
TF_DeviceList* list = new TF_DeviceList;
return nullptr;
}
-
#ifdef TENSORFLOW_EAGER_USE_XLA
// Synthesizes and returns a wrapper function over `op`, which must be a
// primitive op (e.g. matmul).
}
const tensorflow::FunctionDef* fdef;
- {
- fdef = op->ctx->context.FindFunctionDef(op->name);
- }
+ { fdef = op->ctx->context.FindFunctionDef(op->name); }
std::vector<TF_DataType> const_input_types;
std::vector<TF_DataType> arg_input_types;
tensorflow::gtl::FlatMap<int, int> op_input_to_func_input;
} else {
// Execute checks if retvals[i] is nullptr or not to figure if it needs to
// allocate it.
- std::vector<tensorflow::TensorHandle*> handle_retvals(*num_retvals,
- nullptr);
+ tensorflow::gtl::InlinedVector<tensorflow::TensorHandle*, 2> handle_retvals(
+ *num_retvals);
status->status = tensorflow::EagerExecute(
&op->ctx->context, op->device, op->inputs, kernel, maybe_stats.get(),
handle_retvals.data(), *num_retvals);
}
} // namespace tensorflow
-
TFE_Op::~TFE_Op() {
for (tensorflow::TensorHandle* h : inputs) {
h->Unref();
inline tensorflow::Fprint128 CacheKeyHelper(StringPiece s,
const tensorflow::Fprint128& b) {
- // TODO(agarwal): avoid ToString().
- tensorflow::Fprint128 a = tensorflow::Fingerprint128(s.ToString());
+ tensorflow::Fprint128 a = tensorflow::Fingerprint128(s);
return FingerprintCat128(a, b);
}
if (node_def_finalized_) return f;
}
for (const auto& p : string_attrs_) {
- // TODO(agarwal): avoid ToString().
- CombineUnordered(CacheKeyHelper(p.first, tensorflow::Fingerprint128(
- p.second.ToString())),
- &f);
+ CombineUnordered(
+ CacheKeyHelper(p.first, tensorflow::Fingerprint128(p.second)), &f);
}
for (const auto& p : int_attrs_) {
CombineUnordered(CacheKeyHelper(p.first, static_cast<uint64>(p.second)),
namespace tensorflow {
-template <uint64 hash(const string&)>
+template <uint64 hash(StringPiece)>
class StringToHashBucketOp : public OpKernel {
public:
explicit StringToHashBucketOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
#include <farmhash.h>
+#include "tensorflow/core/lib/core/stringpiece.h"
+
namespace tensorflow {
-inline uint64 Fingerprint64(const string& s) {
- return ::util::Fingerprint64(s);
+inline uint64 Fingerprint64(StringPiece s) {
+ return ::util::Fingerprint64(s.data(), s.size());
}
-inline Fprint128 Fingerprint128(const string& s) {
- const auto fingerprint = ::util::Fingerprint128(s);
+inline Fprint128 Fingerprint128(StringPiece s) {
+ const auto fingerprint = ::util::Fingerprint128(s.data(), s.size());
return {::util::Uint128Low64(fingerprint),
::util::Uint128High64(fingerprint)};
}
#ifndef TENSORFLOW_CORE_PLATFORM_FINGERPRINT_H_
#define TENSORFLOW_CORE_PLATFORM_FINGERPRINT_H_
+#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
}
};
-// TODO(sibyl-Mooth6ku): Change these to accept StringPiece (or make them templated
-// on any kind of byte array?).
-
// This is a portable fingerprint interface for strings that will never change.
// However, it is not suitable for cryptography.
-uint64 Fingerprint64(const string& s);
+uint64 Fingerprint64(StringPiece s);
// 128-bit variant of Fingerprint64 above (same properties and caveats apply).
-Fprint128 Fingerprint128(const string& s);
+Fprint128 Fingerprint128(StringPiece s);
namespace internal {
// Mixes some of the bits that got propagated to the high bits back into the