src/gui/painting/qdrawhelper_arm_simd.cpp

   1 /****************************************************************************
   2 **
   3 ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
   4 ** All rights reserved.
   5 ** Contact: Nokia Corporation (qt-info@nokia.com)
   6 **
   7 ** This file is part of the QtGui module of the Qt Toolkit.
   8 **
   9 ** $QT_BEGIN_LICENSE:LGPL$
  10 ** GNU Lesser General Public License Usage
  11 ** This file may be used under the terms of the GNU Lesser General Public
  12 ** License version 2.1 as published by the Free Software Foundation and
  13 ** appearing in the file LICENSE.LGPL included in the packaging of this
  14 ** file. Please review the following information to ensure the GNU Lesser
  15 ** General Public License version 2.1 requirements will be met:
  16 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
  17 **
  18 ** In addition, as a special exception, Nokia gives you certain additional
  19 ** rights. These rights are described in the Nokia Qt LGPL Exception
  20 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
  21 **
  22 ** GNU General Public License Usage
  23 ** Alternatively, this file may be used under the terms of the GNU General
  24 ** Public License version 3.0 as published by the Free Software Foundation
  25 ** and appearing in the file LICENSE.GPL included in the packaging of this
  26 ** file. Please review the following information to ensure the GNU General
  27 ** Public License version 3.0 requirements will be met:
  28 ** http://www.gnu.org/copyleft/gpl.html.
  29 **
  30 ** Other Usage
  31 ** Alternatively, this file may be used in accordance with the terms and
  32 ** conditions contained in a signed written agreement between you and Nokia.
  33 **
  34 **
  35 **
  36 **
  37 **
  38 ** $QT_END_LICENSE$
  39 **
  40 ****************************************************************************/
  41
  42 #include "qdrawhelper_arm_simd_p.h"
  43
  44 #include <private/qpaintengine_raster_p.h>
  45 #include <private/qblendfunctions_p.h>
  46
  47 #ifdef QT_HAVE_ARM_SIMD
  48
  49 #if defined(Q_OS_SYMBIAN)
  50 #if !defined(__SWITCH_TO_ARM)
  51 #ifdef __MARM_THUMB__
  52 #ifndef __ARMCC__
  53 #define __SWITCH_TO_ARM      asm("push {r0} ");\
  54                              asm("add r0, pc, #4 ");\
  55                              asm("bx r0 ");\
  56                              asm("nop ");\
  57                              asm(".align 2 ");\
  58                              asm(".code 32 ");\
  59                              asm("ldr r0, [sp], #4 ")
  60 #define __END_ARM            asm(".code 16 ")
  61 #else
  62 #define __SWITCH_TO_ARM      asm(".code 32 ");
  63 #define __END_ARM
  64 #endif // __ARMCC__
  65 #else
  66 #define __SWITCH_TO_ARM
  67 #define __END_ARM
  68 #endif //__MARM_THUMB__
  69 #endif
  70 #endif
  71
  72 #if defined(Q_OS_SYMBIAN) && defined(Q_CC_RVCT)
  73 __asm void qt_blend_argb32_on_argb32_arm_simd(uchar *destPixels, int dbpl,
  74                                         const uchar *srcPixels, int sbpl,
  75                                         int w, int h,
  76                                         int const_alpha)
  77 {
  78 #ifndef __ARMCC__
  79     __SWITCH_TO_ARM;
  80 #else
  81     CODE32
  82 #endif // __ARMCC__
  83
  84     stmfd   sp!, {r4-r12, r14}
  85
  86     // read arguments off the stack
  87     add     r8, sp, #10 * 4
  88     ldmia   r8, {r4-r6}
  89
  90     // adjust dbpl and sbpl
  91     mov     r14, #4
  92     mul     r14, r4, r14
  93     sub     r1, r1, r14
  94     sub     r3, r3, r14
  95
  96     // load 0xFF00FF00 to r12
  97     mov     r12, #0xFF000000
  98     add     r12, r12, #0xFF00
  99
 100     // load 0x800080 to r14
 101     mov     r14, #0x800000
 102     add     r14, r14, #0x80
 103
 104     /*
 105       Registers:
 106        r0 dst
 107        r1 dbpl
 108        r2 src
 109        r3 sbpl
 110        r4 w
 111        r5 h
 112        r6 const_alpha
 113        r12 0xFF0000
 114        r14 0x800080
 115     */
 116
 117     cmp     r6, #256 //test if we have fully opaque constant alpha value
 118     bne     argb32constalpha // branch if not
 119
 120 argb32_next_row
 121
 122     mov     r7, r4
 123
 124 argb32_next_pixel
 125
 126     ldr     r8, [r2], #4 // load src pixel
 127
 128     // Negate r8 and extract src alpha
 129     mvn     r11, r8 // bitwise not
 130     uxtb    r11, r11, ror #24
 131
 132     cmp     r11, #0 // test for full src opacity (negated)
 133     beq     argb32_no_blend
 134
 135     cmp     r11, #255 // test for full src transparency (negated)
 136     addeq   r0, #4
 137     beq     argb32_nop
 138
 139     ldr     r9, [r0] // load dst pixel
 140
 141     // blend
 142     uxtb16  r10, r9
 143     uxtb16  r6, r9, ror #8
 144     mla     r10, r11, r10, r14
 145     mla     r9, r6, r11, r14
 146     uxtab16 r10, r10, r10, ror #8
 147     uxtab16 r9, r9, r9, ror #8
 148     and     r9, r9, r12
 149     uxtab16 r10, r9, r10, ror #8
 150
 151     uqadd8  r8, r10, r8
 152
 153 argb32_no_blend
 154
 155     str     r8, [r0], #4
 156
 157 argb32_nop
 158
 159     subs    r7, r7, #1
 160     bgt     argb32_next_pixel
 161
 162     add     r0, r0, r1 // dest = dest + dbpl
 163     add     r2, r2, r3 // src = src + sbpl
 164
 165     subs    r5, r5, #1
 166     bgt     argb32_next_row
 167
 168     b       argb32_blend_exit
 169
 170 argb32constalpha
 171
 172     cmp     r6, #0
 173     beq     argb32_blend_exit
 174
 175     ; const_alpha = (const_alpha * 255) >> 8;
 176     mov     r11, #255
 177     mul     r6, r6, r11
 178     mov     r11, r6, lsr #8
 179
 180 argb32constalpha_next_row
 181
 182     mov     r7, r4
 183
 184 argb32constalpha_next_pixel
 185
 186     ldr     r9, [r2], #4 // load src pixel
 187
 188     // blend
 189     uxtb16  r10, r9
 190     uxtb16  r6, r9, ror #8
 191     mla     r10, r11, r10, r14
 192     mla     r9, r6, r11, r14
 193     uxtab16 r10, r10, r10, ror #8
 194     uxtab16 r9, r9, r9, ror #8
 195     and     r9, r9, r12
 196     uxtab16 r8, r9, r10, ror #8
 197
 198     ldr     r9, [r0] // load dst pixel
 199
 200     // blend
 201     uxtb16  r10, r9
 202     uxtb16  r6, r9, ror #8
 203
 204     // Negate r8 and extract src alpha
 205     mvn     r9, r8 // bitwise not
 206     uxtb    r9, r9, ror #24
 207
 208     mla     r10, r9, r10, r14
 209     mla     r9, r6, r9, r14
 210     uxtab16 r10, r10, r10, ror #8
 211     uxtab16 r9, r9, r9, ror #8
 212     and     r9, r9, r12
 213     uxtab16 r10, r9, r10, ror #8
 214
 215     uqadd8  r8, r10, r8
 216
 217     str     r8, [r0], #4
 218
 219     subs    r7, r7, #1
 220     bgt     argb32constalpha_next_pixel
 221
 222     add     r0, r0, r1 // dest = dest + dbpl
 223     add     r2, r2, r3 // src = src + sbpl
 224
 225     subs    r5, r5, #1
 226     bgt     argb32constalpha_next_row
 227
 228 argb32_blend_exit
 229
 230     // Restore registers
 231     ldmfd   sp!, {r4-r12, lr}
 232     bx      lr
 233
 234     __END_ARM
 235 }
 236
 237 void qt_blend_rgb32_on_rgb32_arm_simd(uchar *destPixels, int dbpl,
 238                              const uchar *srcPixels, int sbpl,
 239                              int w, int h,
 240                              int const_alpha)
 241 {
 242     if (const_alpha != 256) {
 243         qt_blend_argb32_on_argb32_arm_simd(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
 244         return;
 245     }
 246
 247     const uint *src = (const uint *) srcPixels;
 248     uint *dst = (uint *) destPixels;
 249     if (w <= 64) {
 250         for (int y=0; y<h; ++y) {
 251             qt_memconvert(dst, src, w);
 252             dst = (quint32 *)(((uchar *) dst) + dbpl);
 253             src = (const quint32 *)(((const uchar *) src) + sbpl);
 254         }
 255     } else {
 256         int len = w * 4;
 257         for (int y=0; y<h; ++y) {
 258             memcpy(dst, src, len);
 259             dst = (quint32 *)(((uchar *) dst) + dbpl);
 260             src = (const quint32 *)(((const uchar *) src) + sbpl);
 261         }
 262     }
 263 }
 264
 265 #else // defined(Q_OS_SYMBIAN) && defined(Q_CC_RVCT)
 266
 267 // TODO: add GNU assembler instructions and support for other platforms.
 268 //       Default to C code for now
 269
 270 void qt_blend_argb32_on_argb32_arm_simd(uchar *destPixels, int dbpl,
 271                                         const uchar *srcPixels, int sbpl,
 272                                         int w, int h,
 273                                         int const_alpha)
 274 {
 275     const uint *src = (const uint *) srcPixels;
 276     uint *dst = (uint *) destPixels;
 277     if (const_alpha == 256) {
 278         for (int y=0; y<h; ++y) {
 279             for (int x=0; x<w; ++x) {
 280                 uint s = src[x];
 281                 if (s >= 0xff000000)
 282                     dst[x] = s;
 283                 else if (s != 0)
 284                     dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
 285             }
 286             dst = (quint32 *)(((uchar *) dst) + dbpl);
 287             src = (const quint32 *)(((const uchar *) src) + sbpl);
 288         }
 289     } else if (const_alpha != 0) {
 290         const_alpha = (const_alpha * 255) >> 8;
 291         for (int y=0; y<h; ++y) {
 292             for (int x=0; x<w; ++x) {
 293                 uint s = BYTE_MUL(src[x], const_alpha);
 294                 dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
 295             }
 296             dst = (quint32 *)(((uchar *) dst) + dbpl);
 297             src = (const quint32 *)(((const uchar *) src) + sbpl);
 298         }
 299     }
 300 }
 301
 302 void qt_blend_rgb32_on_rgb32_arm_simd(uchar *destPixels, int dbpl,
 303                              const uchar *srcPixels, int sbpl,
 304                              int w, int h,
 305                              int const_alpha)
 306 {
 307     if (const_alpha != 256) {
 308         qt_blend_argb32_on_argb32_arm_simd(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
 309         return;
 310     }
 311
 312     const uint *src = (const uint *) srcPixels;
 313     uint *dst = (uint *) destPixels;
 314     if (w <= 64) {
 315         for (int y=0; y<h; ++y) {
 316             qt_memconvert(dst, src, w);
 317             dst = (quint32 *)(((uchar *) dst) + dbpl);
 318             src = (const quint32 *)(((const uchar *) src) + sbpl);
 319         }
 320     } else {
 321         int len = w * 4;
 322         for (int y=0; y<h; ++y) {
 323             memcpy(dst, src, len);
 324             dst = (quint32 *)(((uchar *) dst) + dbpl);
 325             src = (const quint32 *)(((const uchar *) src) + sbpl);
 326         }
 327     }
 328 }
 329
 330 #endif
 331
 332 #endif // QT_HAVE_ARMV_SIMD