"Dump quantized input and weight tensors used in Conv and FC operators "
"during the first iteration");
+DECLARE_bool(caffe2_dnnlowp_force_slow_path);
+
namespace caffe2 {
using namespace std;
bool packW = ConvPoolOpBase<CPUContext>::order_ == StorageOrder::NHWC &&
OperatorBase::debug_def().engine() != "DNNLOWP_ACC16" &&
- is_same<T, uint8_t>::value && GetCpuId().avx2();
+ is_same<T, uint8_t>::value && GetCpuId().avx2() &&
+ !FLAGS_caffe2_dnnlowp_force_slow_path;
bool depthwise_3x3_fast_path = false, depthwise_3x3x3_fast_path = false;
if (TakeDepthWise3x3FastPath_()) {
OperatorBase::debug_def().engine() == "DNNLOWP_ACC16" ||
depthwise_3x3_fast_path) {
reason = "";
+ } else if (FLAGS_caffe2_dnnlowp_force_slow_path) {
+ reason = "slow path enforced";
} else {
assert(false);
}
dnnlowp_copy_to_32bit_frequency, 32,
"When outlier-aware quantization is used, this option specifies how often "
"we spill 16-bit accumulated numbers to 32-bit during the first pass");
+DEFINE_bool(
+ caffe2_dnnlowp_force_slow_path,
+ false,
+ "When true, use slow path in quantization");
namespace dnnlowp {
bool avx2_support = cpuid.avx2();
bool fma_support = cpuid.fma();
if (avx2_support && fma_support && qparams.precision == 8 &&
- std::is_same<T, uint8_t>::value) {
+ std::is_same<T, uint8_t>::value &&
+ !FLAGS_caffe2_dnnlowp_force_slow_path) {
// fast path
constexpr int VLEN = 8;
std::size_t i = 0;