gst/videomixer/blendorc.orc

   1 .function orc_splat_u32
   2 .dest 4 d1 guint32
   3 .param 4 p1 guint32
   4
   5 copyl d1, p1
   6
   7 .function orc_memcpy_u32
   8 .dest 4 d1 guint32
   9 .source 4 s1 guint32
  10
  11 copyl d1, s1
  12
  13 .function orc_blend_u8
  14 .flags 2d
  15 .dest 1 d1 guint8
  16 .source 1 s1 guint8
  17 .param 2 p1
  18 .temp 2 t1
  19 .temp 2 t2
  20 .const 1 c1 8
  21
  22 convubw t1, d1
  23 convubw t2, s1
  24 subw t2, t2, t1
  25 mullw t2, t2, p1
  26 shlw t1, t1, c1
  27 addw t2, t1, t2
  28 shruw t2, t2, c1
  29 convsuswb d1, t2
  30
  31
  32 .function orc_blend_argb
  33 .flags 2d
  34 .dest 4 d guint8
  35 .source 4 s guint8
  36 .param 2 alpha
  37 .temp 4 t
  38 .temp 2 tw
  39 .temp 1 tb
  40 .temp 4 a
  41 .temp 8 d_wide
  42 .temp 8 s_wide
  43 .temp 8 a_wide
  44 .const 4 a_alpha 0x000000ff
  45
  46 loadl t, s
  47 convlw tw, t
  48 convwb tb, tw
  49 splatbl a, tb
  50 x4 convubw a_wide, a
  51 x4 mullw a_wide, a_wide, alpha
  52 x4 shruw a_wide, a_wide, 8
  53 x4 convubw s_wide, t
  54 loadl t, d
  55 x4 convubw d_wide, t
  56 x4 subw s_wide, s_wide, d_wide
  57 x4 mullw s_wide, s_wide, a_wide
  58 x4 div255w s_wide, s_wide
  59 x4 addw d_wide, d_wide, s_wide
  60 x4 convwb t, d_wide
  61 orl t, t, a_alpha
  62 storel d, t
  63
  64 .function orc_blend_bgra
  65 .flags 2d
  66 .dest 4 d guint8
  67 .source 4 s guint8
  68 .param 2 alpha
  69 .temp 4 t
  70 .temp 4 t2
  71 .temp 2 tw
  72 .temp 1 tb
  73 .temp 4 a
  74 .temp 8 d_wide
  75 .temp 8 s_wide
  76 .temp 8 a_wide
  77 .const 4 a_alpha 0xff000000
  78
  79 loadl t, s
  80 shrul t2, t, 24
  81 convlw tw, t2
  82 convwb tb, tw
  83 splatbl a, tb
  84 x4 convubw a_wide, a
  85 x4 mullw a_wide, a_wide, alpha
  86 x4 shruw a_wide, a_wide, 8
  87 x4 convubw s_wide, t
  88 loadl t, d
  89 x4 convubw d_wide, t
  90 x4 subw s_wide, s_wide, d_wide
  91 x4 mullw s_wide, s_wide, a_wide
  92 x4 div255w s_wide, s_wide
  93 x4 addw d_wide, d_wide, s_wide
  94 x4 convwb t, d_wide
  95 orl t, t, a_alpha
  96 storel d, t
  97
  98
  99 .function orc_overlay_argb
 100 .flags 2d
 101 .dest 4 d guint8
 102 .source 4 s guint8
 103 .param 2 alpha
 104 .temp 4 t
 105 .temp 2 tw
 106 .temp 1 tb
 107 .temp 8 alpha_s
 108 .temp 8 alpha_s_inv
 109 .temp 8 alpha_d
 110 .temp 4 a
 111 .temp 8 d_wide
 112 .temp 8 s_wide
 113 .const 4 xfs 0xffffffff
 114 .const 4 a_alpha 0x000000ff
 115 .const 4 a_alpha_inv 0xffffff00
 116
 117 # calc source alpha as alpha_s = alpha_s * alpha / 256
 118 loadl t, s
 119 convlw tw, t
 120 convwb tb, tw
 121 splatbl a, tb
 122 x4 convubw alpha_s, a
 123 x4 mullw alpha_s, alpha_s, alpha
 124 x4 shruw alpha_s, alpha_s, 8
 125 x4 convubw s_wide, t
 126 x4 mullw s_wide, s_wide, alpha_s
 127
 128 # calc destination alpha as alpha_d = (255-alpha_s) * alpha_d / 255
 129 loadpl a, xfs
 130 x4 convubw alpha_s_inv, a
 131 x4 subw alpha_s_inv, alpha_s_inv, alpha_s
 132 loadl t, d
 133 convlw tw, t
 134 convwb tb, tw
 135 splatbl a, tb
 136 x4 convubw alpha_d, a
 137 x4 mullw alpha_d, alpha_d, alpha_s_inv
 138 x4 div255w alpha_d, alpha_d
 139 x4 convubw d_wide, t
 140 x4 mullw d_wide, d_wide, alpha_d
 141
 142 # calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_d*(255-alpha_s)/255
 143 x4 addw d_wide, d_wide, s_wide
 144
 145 # calc the final destination alpha_d = alpha_s + alpha_d * (255-alpha_s)/255
 146 x4 addw alpha_d, alpha_d, alpha_s
 147
 148 # now normalize the pix_d by the final alpha to make it associative
 149 x4 divluw, d_wide, d_wide, alpha_d
 150
 151 # pack the new alpha into the correct spot
 152 x4 convwb t, d_wide
 153 andl t, t, a_alpha_inv
 154 x4 convwb a, alpha_d
 155 andl a, a, a_alpha
 156 orl  t, t, a
 157 storel d, t
 158
 159 .function orc_overlay_bgra
 160 .flags 2d
 161 .dest 4 d guint8
 162 .source 4 s guint8
 163 .param 2 alpha
 164 .temp 4 t
 165 .temp 4 t2
 166 .temp 2 tw
 167 .temp 1 tb
 168 .temp 8 alpha_s
 169 .temp 8 alpha_s_inv
 170 .temp 8 alpha_d
 171 .temp 4 a
 172 .temp 8 d_wide
 173 .temp 8 s_wide
 174 .const 4 xfs 0xffffffff
 175 .const 4 a_alpha 0xff000000
 176 .const 4 a_alpha_inv 0x00ffffff
 177
 178 # calc source alpha as alpha_s = alpha_s * alpha / 256
 179 loadl t, s
 180 shrul t2, t, 24
 181 convlw tw, t2
 182 convwb tb, tw
 183 splatbl a, tb
 184 x4 convubw alpha_s, a
 185 x4 mullw alpha_s, alpha_s, alpha
 186 x4 shruw alpha_s, alpha_s, 8
 187 x4 convubw s_wide, t
 188 x4 mullw s_wide, s_wide, alpha_s
 189
 190 # calc destination alpha as alpha_d = (255-alpha_s) * alpha_d / 255
 191 loadpl a, xfs
 192 x4 convubw alpha_s_inv, a
 193 x4 subw alpha_s_inv, alpha_s_inv, alpha_s
 194 loadl t, d
 195 shrul t2, t, 24
 196 convlw tw, t2
 197 convwb tb, tw
 198 splatbl a, tb
 199 x4 convubw alpha_d, a
 200 x4 mullw alpha_d, alpha_d, alpha_s_inv
 201 x4 div255w alpha_d, alpha_d
 202 x4 convubw d_wide, t
 203 x4 mullw d_wide, d_wide, alpha_d
 204
 205 # calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_d*(255-alpha_s)/255
 206 x4 addw d_wide, d_wide, s_wide
 207
 208 # calc the final destination alpha_d = alpha_s + alpha_d * (255-alpha_s)/255
 209 x4 addw alpha_d, alpha_d, alpha_s
 210
 211 # now normalize the pix_d by the final alpha to make it associative
 212 x4 divluw, d_wide, d_wide, alpha_d
 213
 214 # pack the new alpha into the correct spot
 215 x4 convwb t, d_wide
 216 andl t, t, a_alpha_inv
 217 x4 convwb a, alpha_d
 218 andl a, a, a_alpha
 219 orl  t, t, a
 220 storel d, t