#include "caffe2/operators/fused_rowwise_8bit_conversion_ops.h"
-#include <fp16.h>
#include "c10/util/Registry.h"
namespace caffe2 {
void convertfp16fp32(float* dst, const at::Half* src, size_t N) {
for (size_t i = 0; i < N; i++) {
- dst[i] = fp16_ieee_to_fp32_value(src[i].x);
+ dst[i] = src[i];
}
}
void convertfp32fp16(at::Half* dst, const float* src, size_t N) {
for (size_t i = 0; i < N; i++) {
- uint16_t out = fp16_ieee_from_fp32_value(src[i]);
- memcpy(dst + i, &out, sizeof(uint16_t));
+ dst[i] = src[i];
}
}
} // namespace
#include "caffe2/operators/half_float_ops.h"
+#include <c10/util/Half.h>
namespace caffe2 {
+
+template <>
+bool FloatToHalfOp<CPUContext>::RunOnDevice() {
+ auto& input = Input(0);
+
+ auto* output = Output(0, input.sizes(), at::dtype<at::Half>());
+ const float* data = input.template data<float>();
+ at::Half* out = output->template mutable_data<at::Half>();
+ auto N = input.numel();
+
+ for (auto i = 0; i < N; i++) {
+ out[i] = data[i];
+ }
+
+ return true;
+}
+
+template <>
+bool HalfToFloatOp<CPUContext>::RunOnDevice() {
+ auto& input = Input(0);
+
+ auto* output = Output(0, input.sizes(), at::dtype<float>());
+ const at::Half* data = input.template data<at::Half>();
+ float* out = output->template mutable_data<float>();
+ auto N = input.numel();
+
+ for (auto i = 0; i < N; i++) {
+ out[i] = data[i];
+ }
+ return true;
+}
+
+REGISTER_CPU_OPERATOR(FloatToHalf, FloatToHalfOp<CPUContext>);
+REGISTER_CPU_OPERATOR(HalfToFloat, HalfToFloatOp<CPUContext>);
+
OPERATOR_SCHEMA(FloatToHalf)
.NumInputs(1)
.NumOutputs(1)
- .TensorInferenceFunction(
- [](const OperatorDef& def, const vector<TensorShape>& in) {
- vector<TensorShape> out;
- const TensorShape& X = in[0];
- out.push_back(X);
- out[0].set_data_type(TensorProto_DataType_FLOAT16);
+ .TensorInferenceFunction([](const OperatorDef& /* unused */,
+ const vector<TensorShape>& in) {
+ vector<TensorShape> out;
+ const TensorShape& X = in[0];
+ out.push_back(X);
+ out[0].set_data_type(TensorProto_DataType_FLOAT16);
- return out;
- });
+ return out;
+ });
OPERATOR_SCHEMA(HalfToFloat)
.NumInputs(1)
.NumOutputs(1)
- .TensorInferenceFunction(
- [](const OperatorDef& def, const vector<TensorShape>& in) {
- vector<TensorShape> out;
- const TensorShape& X = in[0];
- out.push_back(X);
- out[0].set_data_type(TensorProto_DataType_FLOAT);
-
- return out;
- });
+ .TensorInferenceFunction([](const OperatorDef& /* unused */,
+ const vector<TensorShape>& in) {
+ vector<TensorShape> out;
+ const TensorShape& X = in[0];
+ out.push_back(X);
+ out[0].set_data_type(TensorProto_DataType_FLOAT);
+
+ return out;
+ });
+
+bool Float16ConstantFillOp::RunOnDevice() {
+ auto* output = Output(0, shape_, at::dtype<at::Half>());
+ const float givenValue =
+ this->template GetSingleArgument<float>("value", 0.0f);
+ at::Half givenFp16Value = givenValue;
+
+ if (output->numel()) {
+ at::Half* out = output->template mutable_data<at::Half>();
+ std::fill(out, out + output->numel(), givenFp16Value);
+ }
+ return true;
+}
+
+bool Float16UniformFillOp::RunOnDevice() {
+ auto* output = Output(0, shape_, at::dtype<at::Half>());
+ at::Half* out = output->template mutable_data<at::Half>();
+
+ // Get a batch row by row and convert
+ auto leading_dim_sz = output->size(0);
+ int rowsz = output->numel() / output->size(0);
+
+ vector<float> intermediate_data_;
+ intermediate_data_.resize(rowsz);
+ for (uint64_t i = 0; i < leading_dim_sz; i++) {
+ math::RandUniform<float, CPUContext>(
+ rowsz, min_, max_, intermediate_data_.data(), &context_);
+ for (uint64_t j = 0; j < rowsz; j++) {
+ out[i * rowsz + j] = intermediate_data_[j];
+ }
+ }
+ return true;
+}
+
+REGISTER_CPU_OPERATOR(Float16ConstantFill, Float16ConstantFillOp);
+REGISTER_CPU_OPERATOR(Float16UniformFill, Float16UniformFillOp);
+OPERATOR_SCHEMA(Float16UniformFill)
+ .NumInputs(0)
+ .NumOutputs(1)
+ .TensorInferenceFunction(Float16FillerTensorInference)
+ .SetDoc(
+ "Fills a half float tensor of a specified shape with"
+ " values from a uniform distribution[min,max]")
+ .Arg("shape", "Shape of the tensor")
+ .Arg("min", "Minimim value to generate")
+ .Arg("max", "Maximum value to generate");
+NO_GRADIENT(Float16UniformFill);
+
OPERATOR_SCHEMA(Float16ConstantFill)
.NumInputs(0)
.NumOutputs(1)
--- /dev/null
+#include <map>
+
+#include "caffe2/core/flags.h"
+#include "caffe2/core/logging.h"
+#include "caffe2/operators/half_float_ops.h"
+#include "caffe2/utils/conversions.h"
+
+#include <gtest/gtest.h>
+C10_DECLARE_string(caffe_test_root);
+
+namespace caffe2 {
+
+TEST(Float16, SimpleTest) {
+ Workspace ws;
+ vector<float> data = {0.1f, 0.23f, 1.6f, 8.2f, -13.9f};
+
+ // loading input data
+ Blob* dataBlob = ws.CreateBlob("data");
+ auto tensor = BlobGetMutableTensor(dataBlob, CPU);
+ tensor->Resize(data.size());
+ for (auto i = 0; i < tensor->numel(); ++i) {
+ tensor->mutable_data<float>()[i] = data[i];
+ }
+
+ // encoding fp32 -> fp16
+ OperatorDef def;
+ def.set_name("test");
+ def.set_type("FloatToHalf");
+ def.add_input("data");
+ def.add_output("data16");
+ unique_ptr<OperatorBase> op(CreateOperator(def, &ws));
+ EXPECT_NE(nullptr, op.get());
+ EXPECT_TRUE(op->Run());
+
+ // run some sanity checks
+ Blob* outputBlob = ws.GetBlob("data16");
+ EXPECT_NE(nullptr, outputBlob);
+ EXPECT_TRUE(outputBlob->IsType<Tensor>());
+ const TensorCPU& outputTensor = outputBlob->Get<Tensor>();
+ EXPECT_EQ(outputTensor.numel(), 5);
+ EXPECT_NO_THROW(outputTensor.data<at::Half>());
+
+ // decode fp16 -> fp32
+ OperatorDef def2;
+ def2.set_name("test");
+ def2.set_type("HalfToFloat");
+ def2.add_input("data16");
+ def2.add_output("result");
+ unique_ptr<OperatorBase> op2(CreateOperator(def2, &ws));
+ EXPECT_NE(nullptr, op2.get());
+ EXPECT_TRUE(op2->Run());
+
+ // validate result
+ Blob* resultBlob = ws.GetBlob("result");
+ EXPECT_NE(nullptr, resultBlob);
+ EXPECT_TRUE(resultBlob->IsType<Tensor>());
+ const TensorCPU& resultTensor = resultBlob->Get<Tensor>();
+ EXPECT_EQ(resultTensor.numel(), 5);
+
+ for (auto i = 0; i < data.size(); ++i) {
+ EXPECT_NEAR(resultTensor.data<float>()[i], data[i], 0.01);
+ }
+}
+
+TEST(Float16, UniformDistributionTest) {
+ Workspace ws;
+
+ OperatorDef def;
+ def.set_name("test");
+ def.set_type("Float16UniformFill");
+ int64_t size = 5000000L;
+ std::vector<int64_t> shape = {size, 32};
+ long tot_size = shape[0];
+ for (int i = 1; i < shape.size(); i++) {
+ tot_size *= shape[i];
+ }
+ caffe2::AddArgument<std::vector<int64_t>>("shape", shape, &def);
+ caffe2::AddArgument<float>("min", -20.0, &def);
+ caffe2::AddArgument<float>("max", 20.0, &def);
+ def.add_output("result");
+
+ unique_ptr<OperatorBase> op(CreateOperator(def, &ws));
+ EXPECT_NE(nullptr, op.get());
+ EXPECT_TRUE(op->Run());
+
+ Blob* resultBlob = ws.GetBlob("result");
+ const TensorCPU& resultTensor = resultBlob->Get<Tensor>();
+ EXPECT_EQ(resultTensor.numel(), tot_size);
+ double mean = 0.0, var = 0.0;
+ const at::Half* data = resultTensor.data<at::Half>();
+ for (auto i = 0; i < resultTensor.numel(); i++) {
+ float x = caffe2::convert::Get<float, at::Half>(data[i]);
+ mean += x;
+ var += x * x;
+ }
+ mean /= tot_size;
+ var /= tot_size;
+ LOG(INFO) << "m " << mean << " " << var;
+
+ // The uniform distribution of [-20,20] should have a mean of 0
+ // and a variance of 40^2/12
+ EXPECT_TRUE(fabs(mean) < 0.1);
+ EXPECT_TRUE(fabs(var - 133.33) / 133.33 < 0.1);
+}
+
+} // namespace caffe2
assert len(ops[0].output) == 1
assert ops[0].output[0] in ops[1].input
- @unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
def testHalfToFloatTypeInference(self):
input = self.new_record(schema.Scalar((np.float32, (32,))))