From f5e8b13f78a085bc95a1c0895e4a38ff6b87b375 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 26 May 2018 00:33:56 +0100 Subject: [PATCH] i915: Fix streaming loads for intel_tiled_memcpy We stream from a tiled and aligned source into an unaligned user buffer, so we need to use _mm_storeu_si128. Fixes: d21c086d819d78fb3f6abcbb14aa492970f442aa (i965/tiled_memcpy: inline movntdqa loads in tiled_to_linear) Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index fac5427..6440dceac 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -223,17 +223,17 @@ _memcpy_streaming_load(void *dest, const void *src, size_t count) { if (count == 16) { __m128i val = _mm_stream_load_si128((__m128i *)src); - _mm_store_si128((__m128i *)dest, val); + _mm_storeu_si128((__m128i *)dest, val); return dest; } else if (count == 64) { __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0); __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1); __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2); __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3); - _mm_store_si128(((__m128i *)dest) + 0, val0); - _mm_store_si128(((__m128i *)dest) + 1, val1); - _mm_store_si128(((__m128i *)dest) + 2, val2); - _mm_store_si128(((__m128i *)dest) + 3, val3); + _mm_storeu_si128(((__m128i *)dest) + 0, val0); + _mm_storeu_si128(((__m128i *)dest) + 1, val1); + _mm_storeu_si128(((__m128i *)dest) + 2, val2); + _mm_storeu_si128(((__m128i *)dest) + 3, val3); return dest; } else { assert(count < 64); /* and (count < 16) for ytiled */ -- 2.7.4