2 * Copyright 2000-2013 Intel Corporation All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 * Zhao Yakui <yakui.zhao@intel.com>
21 // Module name: common.inc
23 // Common header file for all Video-Processing kernels
26 .default_execution_size (16)
27 .default_register_type :ub
32 //========== Common constants ==========
35 //========== Macros ==========
38 //Fast Jump, For more details see "Set_Layer_N.asm"
41 //========== Defines ====================
43 //========== Static Parameters (Common To All) ==========
49 // e.g. byte0 byte1 byte2
53 //Color Pipe (IECP) parameters
61 // e.g. byte0 byte1 byte2
66 //========== Inline parameters (Common To All) ===========
69 //============== Binding Index Table===========
70 //Common between DNDI and DNUV
73 //================= Common Message Descriptor =====
74 // Message descriptor for thread spawning
75 // Message Descriptors
76 // = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
78 // 0001(Spawn a root thread),0001 (Root thread spawn thread)
80 // Thread Spawner Message Descriptor
83 // Message descriptor for atomic operation add
84 // Message Descriptors
85 // = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
86 // 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
87 // 0000,0000 (Binding table index, added later)
90 // Atomic Operation Add Message Descriptor
93 // Message descriptor for dataport media write
94 // Message Descriptors
95 // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
96 // 1 (header present 1) 0 1010 (media block write) 000000
97 // 00000000 (binding table index - set later)
101 // Message Length defines
104 // Response Length defines
107 // Block Width and Height Size defines
110 // Extended Message Descriptors
113 // Common message descriptors:
116 //===================== Math Function Control ===================================
119 //============ Message Registers ===============
120 // buf4 starts from r28
123 //#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
126 .declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
127 .declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
128 .declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
129 .declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
131 //=================== End of thread instruction ===========================
134 //=====================Pointers Used=====================================
137 //=======================================================================
141 // Define temp space for any usages
147 // temp space for rotation
149 .declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
151 .declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
153 .declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
155 .declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
157 .declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
163 // Module name: YUV_to_RGB.asm
165 // Convert YUV to RGB, handle it by 16x4 block
169 // Description: Includes all definitions explicit to Fast Composite.
172 //========== GRF partition ==========
173 // r0 header : r0 (1 GRF)
174 // Static parameters : r1 - r6 (6 GRFS)
175 // Inline parameters : r7 - r8 (2 GRFs)
176 // MSGSRC : r27 (1 GRF)
177 //===================================
180 //========== Static Parameters (Explicit To Fast Composite) ==========
185 .declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
202 // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
204 .declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
207 //Normalised Ratio of Horizontal step size with main video for all layers
210 //Normalised Ratio of Horizontal step size with main video for all layers becomes
211 //Normalised Horizontal step size for all layers in VP_Setup.asm
215 //Normalised Vertical step size for all layers
219 //Normalised Vertical Frame Origin for all layers
223 //Normalised Horizontal Frame Origin for all layers
226 //========== Inline Parameters (Explicit To Fast Composite) ==========
232 //====================== Binding table (Explicit To Fast Composite)=========================================
235 //Used by Interlaced Scaling Kernels
238 //========== Sampler State Table Index (Explicit To Fast Composite)==========
239 //Sampler Index for AVS/IEF messages
242 //Sampler Index for SIMD16 sampler messages
245 //=============================================================================
247 .declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
248 .declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
249 .declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
250 .declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
252 .declare bBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
253 .declare bBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
254 .declare bBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
255 .declare bBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
257 //Pointer to mask reg
263 //Always keep Cannel Pointers and Offsets in same GRF, so that we can use
264 // NODDCLR, NODDCHK flags. -rT
267 .declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
272 .declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
277 .declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
281 .declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
286 //Always keep Cannel Pointers and Offsets in same GRF, so that we can use
287 // NODDCLR, NODDCHK flags. -rT
290 //Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
291 //sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
308 //defines to generate LABELS during compile time.
311 //Msg payload buffers; upto 4 full-size messages can be written
313 //Unnecessary to use the MSGPayLoad, So it is temporiarily used for conversion of YUV->RGB
315 .declare fBUFFER_R Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
316 .declare fBUFFER_G Base=r30.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
317 .declare fBUFFER_B Base=r32.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
319 .declare fBUFFER_Y Base=r36.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
320 .declare fBUFFER_U Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
321 .declare fBUFFER_V Base=r40.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
324 .declare wTempR Base=r42.0 ElementSize=2 Type=w
325 .declare wTempG Base=r44.0 ElementSize=2 Type=w
326 .declare wTempB Base=r46.0 ElementSize=2 Type=w
328 .declare ubTempR Base=r42.0 ElementSize=1 Type=ub
329 .declare ubTempG Base=r44.0 ElementSize=1 Type=ub
330 .declare ubTempB Base=r46.0 ElementSize=1 Type=ub
332 // the r17 register (nTEMP0) is originally defined from "Common.inc"
333 // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
335 .declare wTemp0 Base=r17.0 ElementSize=2 Type=uw
340 // R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255))
341 // G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255))
342 // B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255))
343 // ITU-R conversion, Now we are using ITU-R conversion
344 // R = clip( Y + 1.402*(Cr-128)) // ITU-R
345 // G = clip( Y - 0.344*(Cb-128) - 0.714*(Cr-128))
346 // B = clip( Y + 1.772*(Cb-128))
348 // At the save module we have all 8 address sub-registers available.
349 // So we will use PING-PONG type of scheme to save the data using
350 // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
351 // reduce dependency. - rT
353 //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
354 //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
355 //Offsets are zero for buffer 0 and buffer 4.
356 //Y/U/V is also stored as R/G/B for the internal purpose
358 mov (4) a0.0<1>:uw r22.0<4;4,1>:uw
359 //the first line in the block 0
360 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
361 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
362 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
363 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
364 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
365 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
367 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
368 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
370 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
371 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
373 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
374 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
375 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
377 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
378 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
380 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
381 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
382 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
384 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
385 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
386 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
388 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
389 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
390 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
392 mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
393 mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
394 mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
396 //the second line in the block 0
398 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
399 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
400 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
401 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
402 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
403 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
405 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
406 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
408 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
409 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
411 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
412 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
413 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
415 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
416 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
418 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
419 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
420 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
422 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
423 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
424 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
427 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
428 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
429 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
430 mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
431 mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
432 mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
434 //the third line in the block 0
435 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
436 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
437 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
438 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
439 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
440 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
442 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
443 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
445 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
446 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
448 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
449 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
450 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
452 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
453 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
455 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
456 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
457 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
459 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
460 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
461 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
463 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
464 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
465 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
466 mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
467 mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
468 mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
470 //the fourth line in the block 0
471 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
472 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
473 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
474 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
475 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
476 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
478 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
479 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
481 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
482 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
484 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
485 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
486 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
488 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
489 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
491 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
492 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
493 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
495 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
496 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
497 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
499 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
500 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
501 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
502 mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
503 mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
504 mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
508 add (4) a0.0<1>:uw r22.0<4;4,1>:uw 512:uw
509 //the first line in the block 1
510 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
511 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
512 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
513 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
514 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
515 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
517 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
518 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
520 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
521 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
523 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
524 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
525 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
527 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
528 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
530 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
531 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
532 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
534 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
535 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
536 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
538 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
539 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
540 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
542 mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
543 mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
544 mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
546 //the second line in the block 1
548 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
549 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
550 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
551 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
552 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
553 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
555 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
556 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
558 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
559 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
561 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
562 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
563 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
565 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
566 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
568 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
569 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
570 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
572 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
573 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
574 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
577 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
578 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
579 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
580 mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
581 mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
582 mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
584 //the third line in the block 1
585 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
586 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
587 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
588 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
589 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
590 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
592 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
593 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
595 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
596 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
598 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
599 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
600 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
602 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
603 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
605 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
606 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
607 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
609 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
610 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
611 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
613 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
614 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
615 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
616 mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
617 mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
618 mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
620 //the fourth line in the block 1
621 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
622 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
623 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
624 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
625 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
626 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
628 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
629 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
631 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
632 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
634 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
635 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
636 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
638 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
639 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
641 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
642 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
643 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
645 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
646 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
647 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
649 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
650 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
651 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
652 mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
653 mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
654 mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
658 add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1024:uw
659 //the first line in the block 2
660 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
661 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
662 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
663 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
664 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
665 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
667 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
668 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
670 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
671 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
673 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
674 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
675 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
677 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
678 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
680 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
681 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
682 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
684 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
685 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
686 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
688 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
689 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
690 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
692 mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
693 mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
694 mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
696 //the second line in the block 2
698 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
699 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
700 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
701 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
702 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
703 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
705 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
706 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
708 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
709 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
711 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
712 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
713 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
715 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
716 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
718 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
719 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
720 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
722 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
723 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
724 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
727 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
728 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
729 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
730 mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
731 mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
732 mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
734 //the third line in the block 2
735 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
736 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
737 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
738 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
739 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
740 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
742 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
743 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
745 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
746 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
748 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
749 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
750 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
752 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
753 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
755 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
756 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
757 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
759 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
760 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
761 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
763 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
764 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
765 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
766 mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
767 mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
768 mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
770 //the fourth line in the block 2
771 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
772 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
773 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
774 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
775 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
776 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
778 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
779 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
781 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
782 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
784 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
785 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
786 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
788 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
789 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
791 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
792 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
793 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
795 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
796 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
797 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
799 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
800 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
801 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
802 mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
803 mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
804 mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
808 add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1536:uw
809 //the first line in the block 3
810 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
811 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
812 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
813 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
814 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
815 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
817 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
818 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
820 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
821 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
823 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
824 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
825 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
827 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
828 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
830 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
831 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
832 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
834 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
835 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
836 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
838 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
839 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
840 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
842 mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
843 mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
844 mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
846 //the second line in the block 3
848 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
849 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
850 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
851 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
852 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
853 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
855 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
856 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
858 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
859 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
861 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
862 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
863 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
865 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
866 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
868 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
869 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
870 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
872 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
873 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
874 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
877 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
878 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
879 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
880 mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
881 mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
882 mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
884 //the third line in the block 3
885 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
886 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
887 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
888 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
889 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
890 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
892 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
893 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
895 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
896 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
898 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
899 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
900 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
902 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
903 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
905 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
906 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
907 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
909 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
910 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
911 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
913 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
914 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
915 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
916 mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
917 mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
918 mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
920 //the fourth line in the block 3
921 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
922 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
923 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
924 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
925 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
926 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
928 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
929 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
931 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
932 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
934 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
935 mac (16) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
936 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
938 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
939 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
941 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
942 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
943 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
945 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
946 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
947 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
949 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
950 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
951 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
952 mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
953 mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
954 mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>