From 18a6781f5758aed7f910a49e62f65f20f18c8f81 Mon Sep 17 00:00:00 2001 From: Jianyu Huang Date: Fri, 22 Mar 2019 12:28:04 -0700 Subject: [PATCH] Fix alignment issues for Fake BFP16 fp32 -> bfp16 rounding routines (#18321) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18321 As title. Reviewed By: jspark1105 Differential Revision: D14575512 fbshipit-source-id: 0e33cdab54b1aef8b67f0b4c366692c5dbdf631d --- caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc b/caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc index c8ca69e..414bfe2 100644 --- a/caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc +++ b/caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc @@ -93,7 +93,7 @@ void fp32_to_bfp16_round(const float* source, size_t size, float* dest) { reinterpret_cast<__m256i*>(&dest[i]), _mm256_and_si256(wmask, v32int)); } for (auto i = (size / 8) * 8; i < size; i++) { - alignas(8) float tmp[8]; + alignas(64) float tmp[8]; __m256i v32int = _mm256_add_epi32( _mm256_set1_epi32(*reinterpret_cast(&source[i])), woffset); _mm256_store_si256( -- 2.7.4