From 99d59d4395d86bb27f385a4d983de461f6acef84 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Andre Date: Tue, 22 Mar 2016 13:49:00 +0900 Subject: [PATCH] evas: NEON scaling up fixed Summary: Previous implementation loaded data from memory first and then checked the borders. Here I check the borders first as it is for C implementation. This prevents read of non-accessible memory. Reviewers: cedric, jypark, Hermet, jiin.moon, jpeg Reviewed By: jpeg Projects: #efl Differential Revision: https://phab.enlightenment.org/D3809 Change-Id: Ib369af395669b91cf243acef2a32890962365d02 --- src/lib/evas/common/evas_scale_smooth_scaler_up.c | 37 +++++++++++++---------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/lib/evas/common/evas_scale_smooth_scaler_up.c b/src/lib/evas/common/evas_scale_smooth_scaler_up.c index 5ba7805..f059e8f 100644 --- a/src/lib/evas/common/evas_scale_smooth_scaler_up.c +++ b/src/lib/evas/common/evas_scale_smooth_scaler_up.c @@ -234,6 +234,7 @@ uint32x2x2_t vp0, vp1; uint16x8_t vax; uint16x8_t vax1; + DATA32 pa[2][4]; #else DATA32 p0, p1, p2, p3; #endif @@ -242,30 +243,34 @@ ax = 1 + ((sxx - (sx << 16)) >> 8); p = psrc + sx; q = p + src_w; #ifdef SCALE_USING_NEON - vax = vdupq_n_u16(ax); - vp0.val[0] = vld1_u32(p); - vp0.val[1] = vld1_u32(q); - if ((sx + 1) >= srw) + pa[0][0] = pa[0][1] = pa[0][2] = pa[0][3] = *p; + if ((sx + 1) < srw) + pa[0][1] = *(p + 1); + if ((sy + 1) < srh) { - vp0.val[0] = vdup_lane_u32(vp0.val[0], 0); // p0, p1 - vp0.val[1] = vdup_lane_u32(vp0.val[1], 0); // p2, p3 + pa[0][2] = *q; pa[0][3] = pa[0][2]; + if ((sx + 1) < srw) + pa[0][3] = *(q + 1); } - if ((sy + 1) >= srh) - vp0.val[1] = vdup_lane_u32(vp0.val[0], 0); + vax = vdupq_n_u16(ax); + vp0.val[0] = vld1_u32(&pa[0][0]); + vp0.val[1] = vld1_u32(&pa[0][2]); sxx += dsxx; sx = sxx >> 16; ax1 = 1 + ((sxx - (sx << 16)) >> 8); - vax1 = vdupq_n_u16(ax1); p1 = psrc + sx; q1 = p1 + src_w; - vp1.val[0] = vld1_u32(p1); - vp1.val[1] = vld1_u32(q1); - if ((sx + 1) >= srw) + pa[1][0] = pa[1][1] = pa[1][2] = pa[1][3] = *p1; + if ((sx + 1) < srw) + pa[1][1] = *(p1 + 1); + if ((sy + 1) < srh) { - vp1.val[0] = vdup_lane_u32(vp1.val[0], 0); // p4, p5 - vp1.val[1] = vdup_lane_u32(vp1.val[1], 0); // p6, p7 + pa[1][2] = *q1; pa[1][3] = pa[1][2]; + if ((sx + 1) < srw) + pa[1][3] = *(q1 + 1); } - if ((sy + 1) >= srh) - vp1.val[1] = vdup_lane_u32(vp1.val[0], 0); + vax1 = vdupq_n_u16(ax1); + vp1.val[0] = vld1_u32(&pa[1][0]); + vp1.val[1] = vld1_u32(&pa[1][2]); #else p0 = p1 = p2 = p3 = *p; if ((sx + 1) < srw) -- 2.7.4