src/media/base/simd/convert_rgb_to_yuv_ssse3.inc

   1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
   2 ; Use of this source code is governed by a BSD-style license that can be
   3 ; found in the LICENSE file.
   4
   5 ;
   6 ; void SYMBOL(const uint8* argb, uint8* y, uint8* u, uint8* v, int width);
   7 ;
   8 ; The main code that converts RGB pixels to YUV pixels. This function roughly
   9 ; consists of three parts: converting one ARGB pixel to YUV pixels, converting
  10 ; two ARGB pixels to YUV pixels, and converting four ARGB pixels to YUV pixels.
  11 ; To write the structure of this function in C, it becomes the snippet listed
  12 ; below.
  13 ;
  14 ;   if (width & 1) {
  15 ;     --width;
  16 ;     // Convert one ARGB pixel to one Y pixel, one U pixel, and one V pixel.
  17 ;   }
  18 ;
  19 ;   if (width & 2) {
  20 ;     width -= 2;
  21 ;     // Convert two ARGB pixels to two Y pixels, one U pixel, and one V pixel.
  22 ;   }
  23 ;
  24 ;   while (width) {
  25 ;     width -= 4;
  26 ;     // Convert four ARGB pixels to four Y pixels, two U pixels, and two V
  27 ;     // pixels.
  28 ;   }
  29 ;
  30   EXPORT    SYMBOL
  31   align     function_align
  32
  33 mangle(SYMBOL):
  34   %assign stack_offset 0
  35   PROLOGUE 5, 6, 8, ARGB, Y, U, V, WIDTH, TEMP
  36
  37   ; Initialize constants used in this function. (We use immediates to avoid
  38   ; dependency onto GOT.)
  39   LOAD_XMM  XMM_CONST_Y0, 0x00420219
  40   LOAD_XMM  XMM_CONST_Y1, 0x00007F00
  41   LOAD_XMM  XMM_CONST_U, 0x00DAB670
  42   LOAD_XMM  XMM_CONST_V, 0x0070A2EE
  43   LOAD_XMM  XMM_CONST_128, 0x00800080
  44
  45 .convert_one_pixel:
  46   ; Divide the input width by two so it represents the offsets for u[] and v[].
  47   ; When the width is odd, We read the rightmost ARGB pixel and convert its
  48   ; colorspace to YUV. This code stores one Y pixel, one U pixel, and one V
  49   ; pixel.
  50   sar       WIDTHq, 1
  51   jnc       .convert_two_pixels
  52
  53   ; Read one ARGB (or RGB) pixel.
  54   READ_ARGB xmm0, 1
  55
  56   ; Calculate y[0] from one RGB pixel read above.
  57   CALC_Y    xmm1, xmm0
  58   movd      TEMPd, xmm1
  59   mov       BYTE [Yq + WIDTHq * 2], TEMPb
  60
  61   ; Calculate u[0] from one RGB pixel read above. If this is an odd line, the
  62   ; output pixel contains the U value calculated in the previous call. We also
  63   ; read this pixel and calculate their average.
  64   INIT_UV   TEMPd, Uq, 4
  65   CALC_UV   xmm1, xmm0, XMM_CONST_U, TEMPd
  66   movd      TEMPd, xmm1
  67   mov       BYTE [Uq + WIDTHq], TEMPb
  68
  69   ; Calculate v[0] from one RGB pixel. Same as u[0], we read the result of the
  70   ; previous call and get their average.
  71   INIT_UV   TEMPd, Uq, 4
  72   CALC_UV   xmm1, xmm0, XMM_CONST_V, TEMPd
  73   movd      TEMPd, xmm1
  74   mov       BYTE [Vq + WIDTHq], TEMPb
  75
  76 .convert_two_pixels:
  77   ; If the input width is not a multiple of four, read the rightmost two ARGB
  78   ; pixels and convert their colorspace to YUV. This code stores two Y pixels,
  79   ; one U pixel, and one V pixel.
  80   test      WIDTHb, 2 / 2
  81   jz        .convert_four_pixels
  82   sub       WIDTHb, 2 / 2
  83
  84   ; Read two ARGB (or RGB) pixels.
  85   READ_ARGB xmm0, 2
  86
  87   ; Calculate r[0] and r[1] from two RGB pixels read above.
  88   CALC_Y    xmm1, xmm0
  89   movd      TEMPd, xmm1
  90   mov       WORD [Yq + WIDTHq * 2], TEMPw
  91
  92   ; Skip calculating u and v if the output buffer is NULL.
  93   test      Uq, Uq
  94   jz        .convert_four_pixels
  95
  96   ; Calculate u[0] from two RGB pixels read above. (For details, read the above
  97   ; comment in .convert_one_pixel).
  98   INIT_UV   TEMPd, Uq, 2
  99   CALC_UV   xmm1, xmm0, XMM_CONST_U, TEMPd
 100   movd      TEMPd, xmm1
 101   mov       BYTE [Uq + WIDTHq], TEMPb
 102
 103   ; Calculate v[0] from two RGB pixels read above.
 104   INIT_UV   TEMPd, Vq, 2
 105   CALC_UV   xmm1, xmm0, XMM_CONST_V, TEMPd
 106   movd      TEMPd, xmm1
 107   mov       BYTE [Vq + WIDTHq], TEMPb
 108
 109 .convert_four_pixels:
 110   ; Read four ARGB pixels and convert their colorspace to YUV. This code stores
 111   ; four Y pixels, two U pixels, and two V pixels.
 112   test      WIDTHq, WIDTHq
 113   jz        .convert_finish
 114
 115 %if PIXELSIZE == 4
 116   ; Check if the input buffer is aligned to a 16-byte boundary and use movdqa
 117   ; for reading the ARGB pixels.
 118   test      ARGBw, 15
 119   jnz       .convert_four_pixels_unaligned
 120
 121 .convert_four_pixels_aligned:
 122   sub       WIDTHq, 4 / 2
 123
 124   ; Read four ARGB pixels. (We can use movdqa here since we have checked if the
 125   ; source address is aligned.)
 126   movdqa    xmm0, DQWORD [ARGBq + WIDTHq * 4 * 2]
 127
 128   ; Calculate y[0], y[1], y[2],and, y[3] from the input ARGB pixels.
 129   CALC_Y    xmm1, xmm0
 130   movd      DWORD [Yq + WIDTHq * 2], xmm1
 131
 132 %if SUBSAMPLING == 0
 133   ; Skip calculating u and v if the output buffer is NULL, which means we are
 134   ; converting an odd line. (When we enable subsampling, these buffers must
 135   ; contain the u and v values for the previous call, i.e. these variables must
 136   ; not be NULL.)
 137   test      Uq, Uq
 138   jz        .convert_four_pixels_aligned_next
 139 %endif
 140
 141   ; Calculate u[0] and u[1] from four ARGB pixels read above.
 142   INIT_UV   TEMPd, Uq, 4
 143   CALC_UV   xmm1, xmm0, XMM_CONST_U, TEMPd
 144   movd      TEMPd, xmm1
 145   mov       WORD [Uq + WIDTHq], TEMPw
 146
 147   ; Calculate v[0] and v[1] from four ARGB pixels read above.
 148   INIT_UV   TEMPd, Vq, 4
 149   CALC_UV   xmm1, xmm0, XMM_CONST_V, TEMPd
 150   movd      TEMPd, xmm1
 151   mov       WORD [Vq + WIDTHq], TEMPw
 152
 153 %if SUBSAMPLING == 0
 154 .convert_four_pixels_aligned_next:
 155 %endif
 156
 157   test      WIDTHq, WIDTHq
 158   jnz       .convert_four_pixels_aligned
 159
 160   jmp       .convert_finish
 161 %endif
 162
 163 .convert_four_pixels_unaligned:
 164   sub       WIDTHq, 4 / 2
 165
 166   ; Read four ARGB (or RGB) pixels.
 167   READ_ARGB xmm0, 4
 168
 169   ; Calculate y[0], y[1], y[2],and, y[3] from the input ARGB pixels.
 170   CALC_Y    xmm1, xmm0
 171   movd      DWORD [Yq + WIDTHq * 2], xmm1
 172
 173 %if SUBSAMPLING == 0
 174   ; Skip calculating u and v if the output buffer is NULL.
 175   test      Uq, Uq
 176   jz        .convert_four_pixels_unaligned_next
 177 %endif
 178
 179   ; Calculate u[0] and u[1] from the input ARGB pixels.
 180   INIT_UV   TEMPd, Uq, 4
 181   CALC_UV   xmm1, xmm0, XMM_CONST_U, TEMPd
 182   movd      TEMPd, xmm1
 183   mov       WORD [Uq + WIDTHq], TEMPw
 184
 185   ; Calculate v[0] and v[1] from the input ARGB pixels.
 186   INIT_UV   TEMPd, Vq, 4
 187   CALC_UV   xmm1, xmm0, XMM_CONST_V, TEMPd
 188   movd      TEMPd, xmm1
 189   mov       WORD [Vq + WIDTHq], TEMPw
 190
 191 %if SUBSAMPLING == 0
 192 .convert_four_pixels_unaligned_next:
 193 %endif
 194
 195   test      WIDTHq, WIDTHq
 196   jnz       .convert_four_pixels_unaligned
 197
 198 .convert_finish:
 199   ; Just exit this function since this is a void function.
 200   RET