nbits_in_non_outlier_ < 8) {
CAFFE_ENFORCE(!W_quantized_.empty());
- Wq_outlier_.reset(ExtractOutlierMatrix(
+ int outlier_cnt = CountOutliers(
group_,
kernel_dim,
num_out_channels,
nbits_in_non_outlier_,
- W_quantized_));
- int outlier_cnt = Wq_outlier_->ColPtr()[num_out_channels];
+ W_quantized_);
C10_LOG_FIRST_N(INFO, 10)
<< "Proportion of outlier for Conv layer with weight blob "
// We need to call GetQuantizationParameters_ again to pack for acc32
return BaseType::GetQuantizationParameters_();
}
+
+ Wq_outlier_.reset(ExtractOutlierMatrix(
+ group_,
+ kernel_dim,
+ num_out_channels,
+ nbits_in_non_outlier_,
+ W_quantized_));
}
bool packW = this->order_ == StorageOrder::NHWC && GetCpuId().avx2();
const vector<TensorQuantizationParams>& qparams,
vector<int32_t>& col_offsets);
-fbgemm::CompressedSparseColumn* ExtractOutlierMatrix(
+int CountOutliers(
int groups,
int kernel_dim,
int M,
}
}
}
+ return outlier_cnt;
+}
+
+fbgemm::CompressedSparseColumn* ExtractOutlierMatrix(
+ int groups,
+ int kernel_dim,
+ int M,
+ int nbits_in_non_outlier,
+ vector<int8_t>& W_quantized) {
+ int outlier_cnt =
+ CountOutliers(groups, kernel_dim, M, nbits_in_non_outlier, W_quantized);
fbgemm::CompressedSparseColumn* Wq_outlier =
new fbgemm::CompressedSparseColumn(kernel_dim, M);
}
}
} // for each group
+ CAFFE_ENFORCE_EQ(outlier_cnt, Wq_outlier->RowIdx().size());
Wq_outlier->ColPtr()[M] = outlier_cnt;
return Wq_outlier;
if (this->debug_def().engine() == "DNNLOWP_ACC16" &&
!fallback_to_32_bit_accumulation) {
if (nbits_in_non_outlier_ < 8) {
- Y->W_outlier.reset(ExtractOutlierMatrix(
- group_, kernel_dim, M, nbits_in_non_outlier_, W_quantized));
- int outlier_cnt = Y->W_outlier->ColPtr()[M];
+ int outlier_cnt = CountOutliers(
+ group_, kernel_dim, M, nbits_in_non_outlier_, W_quantized);
LOG(INFO) << "Proportion of outlier for Conv layer with weight blob "
<< this->debug_def().input(0) << " is "
<< FLAGS_caffe2_dnnlowp_acc16_density_threshold
<< " . Falling back to acc32";
fallback_to_32_bit_accumulation = true;
+ } else {
+ Y->W_outlier.reset(ExtractOutlierMatrix(
+ group_, kernel_dim, M, nbits_in_non_outlier_, W_quantized));
}
}