check_quantized_results_close,
generate_conv_inputs,
generate_convnd_inputs,
+ run_conv_or_fc,
)
from hypothesis import given
)
net.Proto().op.extend([relu_op])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
import numpy as np
from caffe2.python import core, dyndep, utils, workspace
from caffe2.quantization.server import utils as dnnlowp_utils
-from dnnlowp_test_utils import check_quantized_results_close
+from dnnlowp_test_utils import (
+ check_quantized_results_close,
+ run_conv_or_fc,
+)
from hypothesis import assume, given
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, None, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
if (has_packed_bias) {
const auto& packed_filter =
this->template Input<Int8ConvDNNLowPPackedWeightBlob>(FILTER);
- b_quantized_ = packed_filter.bias;
- b_quantized_data_ = b_quantized_->data();
+ b_quantized_data_ = packed_filter.bias->data();
} else {
const auto& bias = InputTensorCPU_(BIAS);
if (this->template InputIsType<int8::Int8TensorCPU>(BIAS)) {
if (this->order_ == StorageOrder::NHWC && in_qparams_[INPUT].zero_point &&
column_offsets_->empty()) {
if (b_quantized_->empty()) {
+ // When b_quantized_data_ is from pre-packed bias or Int8TensorCPU,
+ // we can't inplace modify so copy to internal b_quantized_ vector.
b_quantized_->assign(b_quantized_data_, b_quantized_data_ + M);
b_quantized_data_ = b_quantized_->data();
}
check_quantized_results_close,
generate_conv_inputs,
generate_convnd_inputs,
+ run_conv_or_fc,
)
from hypothesis import assume, given
)
net.Proto().op.extend([quantize])
- x_q_param = dnnlowp_utils.choose_quantization_params(X.min(), X.max(), preserve_activation_sparsity) # noqa
+ x_q_param = dnnlowp_utils.choose_quantization_params(
+ X.min(), X.max(), preserve_activation_sparsity
+ )
if do_quantize_weight:
int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill(
W, "W_q", preserve_weight_sparsity
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
)
net.Proto().op.extend([relu])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, None, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs)
init_net = core.Net("test_init_net")
net = core.Net("test_net")
- fall_back_to_NCHW = "DNNLOWP" not in engine and order == "NHWC"
-
- if fall_back_to_NCHW:
- X_nchw = utils.NHWC2NCHW(X)
- W_nchw = utils.NHWC2NCHW(W)
-
do_quantize = "DNNLOWP" in engine
do_dequantize = "DNNLOWP" in engine
# If output scale/zp aren't set, it gets computed from ref fp32 op
kernels=kernels,
dilations=[dilation] * ndim,
pads=[pad] * (ndim * 2),
- order="NCHW" if fall_back_to_NCHW else order,
+ order=order,
dequantize_output=not do_dequantize,
engine=engine,
group=group,
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(
- X_nchw if fall_back_to_NCHW else X, device_option=gc
- )
- self.ws.create_blob("W").feed(
- W_nchw if fall_back_to_NCHW else W, device_option=gc
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, order, gc, outputs
)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- if fall_back_to_NCHW:
- Y = utils.NCHW2NHWC(Y)
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
check_quantized_results_close(outputs)
import numpy as np
from caffe2.python import core, dyndep, utils, workspace
from caffe2.quantization.server import utils as dnnlowp_utils
-from dnnlowp_test_utils import check_quantized_results_close
+from dnnlowp_test_utils import (
+ check_quantized_results_close,
+ run_conv_or_fc,
+)
from hypothesis import assume, given
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, None, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs)
from caffe2.python import core, dyndep, workspace
from caffe2.python.fb import hardcode_scale_zp
from caffe2.quantization.server import utils as dnnlowp_utils
-from dnnlowp_test_utils import check_quantized_results_close, generate_conv_inputs
+from dnnlowp_test_utils import (
+ check_quantized_results_close,
+ generate_conv_inputs,
+ run_conv_or_fc,
+)
from hypothesis import assume, given
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
)
net.Proto().op.extend([relu])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(net)
- Y = self.ws.blobs["Y"].fetch()
- outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ run_conv_or_fc(
+ self, None, net, X, W, b, op_type, engine, order, gc, outputs
+ )
check_quantized_results_close(outputs)
from __future__ import absolute_import, division, print_function, unicode_literals
+import collections
+
import numpy as np
-from caffe2.python import utils
+from caffe2.python import utils, workspace
from hypothesis import assume
preserve_activation_sparsity,
preserve_weight_sparsity,
)
+
+
+def run_conv_or_fc(
+ test_case, init_net, net, X, W, b, op_type, engine, order, gc, outputs
+):
+ if order:
+ # Conv
+ Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"])
+ else:
+ # FC
+ Output = collections.namedtuple("Output", ["Y", "op_type", "engine"])
+
+ # We run DNNLOWP ops multiple times to test their first runs that
+ # do caching so exercises different code paths from the subsequent
+ # runs
+
+ # self.ws.run re-creates operator everytime so this test covers
+ # cases when we have multiple nets sharing the same workspace
+ test_case.ws.create_blob("X").feed(X, device_option=gc)
+ test_case.ws.create_blob("W").feed(W, device_option=gc)
+ test_case.ws.create_blob("b").feed(b, device_option=gc)
+ if init_net:
+ test_case.ws.run(init_net)
+ for i in range(1 if engine == "" else 2):
+ test_case.ws.run(net)
+ Y = test_case.ws.blobs["Y"].fetch()
+ if order:
+ outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ else:
+ outputs.append(Output(Y=Y, op_type=op_type, engine=engine))
+
+ # workspace.CreateNet + workspace.RunNet reuses the same operator
+ if engine != "":
+ workspace.FeedBlob("X", X)
+ workspace.FeedBlob("W", W)
+ workspace.FeedBlob("b", b)
+ if init_net:
+ workspace.RunNetOnce(init_net)
+ workspace.CreateNet(net)
+ for i in range(2):
+ workspace.RunNet(net)
+ Y = workspace.FetchBlob("Y")
+ if order:
+ outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order))
+ else:
+ outputs.append(Output(Y=Y, op_type=op_type, engine=engine))
import numpy as np
from caffe2.python import core, dyndep, workspace
from caffe2.quantization.server import utils as dnnlowp_utils
-from dnnlowp_test_utils import check_quantized_results_close
+from dnnlowp_test_utils import check_quantized_results_close, run_conv_or_fc
from hypothesis import given
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(net)
- outputs.append(
- Output(Y=self.ws.blobs["Y"].fetch(), op_type=op_type, engine=engine)
+ run_conv_or_fc(
+ self, None, net, X, W, b, op_type, engine, None, gc, outputs
)
check_quantized_results_close(outputs)
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- outputs.append(
- Output(Y=self.ws.blobs["Y"].fetch(), op_type=op_type, engine=engine)
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, None, gc, outputs
)
check_quantized_results_close(outputs)
const auto& packed_filter =
this->template Input<Int8FCDNNLowPPackedWeightBlob>(1);
CAFFE_ENFORCE(!dequantize_output_);
- b_quantized_ = packed_filter.bias;
- b_quantized_data_ = b_quantized_->data();
+ b_quantized_data_ = packed_filter.bias->data();
} else {
const auto& bias = InputTensorCPU_(2);
if (this->template InputIsType<int8::Int8TensorCPU>(2)) {
if (in_qparams_[0].zero_point && column_offsets_->empty() &&
b_quantized_data_) {
if (b_quantized_->empty()) {
+ // When b_quantized_data_ is from pre-packed bias or Int8TensorCPU,
+ // we can't inplace modify so copy to internal b_quantized_ vector.
b_quantized_->assign(b_quantized_data_, b_quantized_data_ + N);
b_quantized_data_ = b_quantized_->data();
}
from dnnlowp_test_utils import (
avoid_vpmaddubsw_overflow_fc,
check_quantized_results_close,
+ run_conv_or_fc,
)
from hypothesis import given
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- outputs.append(
- Output(Y=self.ws.blobs["Y"].fetch(), op_type=op_type, engine=engine)
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, None, gc, outputs
)
check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
from dnnlowp_test_utils import (
avoid_vpmaddubsw_overflow_fc,
check_quantized_results_close,
+ run_conv_or_fc,
)
from hypothesis import given
)
net.Proto().op.extend([dequantize])
- self.ws.create_blob("X").feed(X, device_option=gc)
- self.ws.create_blob("W").feed(W, device_option=gc)
- self.ws.create_blob("b").feed(b, device_option=gc)
- self.ws.run(init_net)
- self.ws.run(net)
- outputs.append(
- Output(Y=self.ws.blobs["Y"].fetch(), op_type=op_type, engine=engine)
+ run_conv_or_fc(
+ self, init_net, net, X, W, b, op_type, engine, None, gc, outputs
)
check_quantized_results_close(outputs)