2 * Copyright 2000-2013 Intel Corporation All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 * Zhao Yakui <yakui.zhao@intel.com>
21 // Module name: common.inc
23 // Common header file for all Video-Processing kernels
26 .default_execution_size (16)
27 .default_register_type :ub
32 //========== Common constants ==========
35 //========== Macros ==========
38 //Fast Jump, For more details see "Set_Layer_N.asm"
41 //========== Defines ====================
43 //========== Static Parameters (Common To All) ==========
49 // e.g. byte0 byte1 byte2
53 //Color Pipe (IECP) parameters
61 // e.g. byte0 byte1 byte2
66 //========== Inline parameters (Common To All) ===========
69 //============== Binding Index Table===========
70 //Common between DNDI and DNUV
73 //================= Common Message Descriptor =====
74 // Message descriptor for thread spawning
75 // Message Descriptors
76 // = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
78 // 0001(Spawn a root thread),0001 (Root thread spawn thread)
80 // Thread Spawner Message Descriptor
83 // Message descriptor for atomic operation add
84 // Message Descriptors
85 // = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
86 // 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
87 // 0000,0000 (Binding table index, added later)
90 // Atomic Operation Add Message Descriptor
93 // Message descriptor for dataport media write
94 // Message Descriptors
95 // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
96 // 1 (header present 1) 0 1010 (media block write) 000000
97 // 00000000 (binding table index - set later)
101 // Message Length defines
104 // Response Length defines
107 // Block Width and Height Size defines
110 // Extended Message Descriptors
113 // Common message descriptors:
116 //===================== Math Function Control ===================================
119 //============ Message Registers ===============
120 // buf4 starts from r28
123 //#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
126 .declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
127 .declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
128 .declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
129 .declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
131 //=================== End of thread instruction ===========================
134 //=====================Pointers Used=====================================
137 //=======================================================================
141 // Define temp space for any usages
147 // temp space for rotation
149 .declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
151 .declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
153 .declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
155 .declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
157 .declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
163 // Module name: YUV_to_RGB.asm
165 // Convert YUV to RGB, handle it by 16x4 block
169 // Description: Includes all definitions explicit to Fast Composite.
172 //========== GRF partition ==========
173 // r0 header : r0 (1 GRF)
174 // Static parameters : r1 - r6 (6 GRFS)
175 // Inline parameters : r7 - r8 (2 GRFs)
176 // MSGSRC : r27 (1 GRF)
177 //===================================
180 //========== Static Parameters (Explicit To Fast Composite) ==========
185 .declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
202 // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
204 .declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
207 //Normalised Ratio of Horizontal step size with main video for all layers
210 //Normalised Ratio of Horizontal step size with main video for all layers becomes
211 //Normalised Horizontal step size for all layers in VP_Setup.asm
215 //Normalised Vertical step size for all layers
219 //Normalised Vertical Frame Origin for all layers
223 //Normalised Horizontal Frame Origin for all layers
226 //========== Inline Parameters (Explicit To Fast Composite) ==========
232 //====================== Binding table (Explicit To Fast Composite)=========================================
235 //Used by Interlaced Scaling Kernels
238 //========== Sampler State Table Index (Explicit To Fast Composite)==========
239 //Sampler Index for AVS/IEF messages
242 //Sampler Index for SIMD16 sampler messages
245 //=============================================================================
247 .declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
248 .declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
249 .declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
250 .declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
252 .declare bBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
253 .declare bBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
254 .declare bBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
255 .declare bBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
257 //Pointer to mask reg
263 //Always keep Cannel Pointers and Offsets in same GRF, so that we can use
264 // NODDCLR, NODDCHK flags. -rT
267 .declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
272 .declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
277 .declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
281 .declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
286 //Always keep Cannel Pointers and Offsets in same GRF, so that we can use
287 // NODDCLR, NODDCHK flags. -rT
290 //Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
291 //sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
308 //defines to generate LABELS during compile time.
311 //Msg payload buffers; upto 4 full-size messages can be written
313 //Unnecessary to use the MSGPayLoad, So it is temporiarily used for conversion of YUV->RGB
315 .declare fBUFFER_R Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
316 .declare fBUFFER_G Base=r30.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
317 .declare fBUFFER_B Base=r32.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
319 .declare fBUFFER_Y Base=r36.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
320 .declare fBUFFER_U Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
321 .declare fBUFFER_V Base=r40.0 ElementSize=4 SrcRegion=<8;8,1> Type=f
324 .declare wTempR Base=r42.0 ElementSize=2 Type=w
325 .declare wTempG Base=r44.0 ElementSize=2 Type=w
326 .declare wTempB Base=r46.0 ElementSize=2 Type=w
328 .declare ubTempR Base=r42.0 ElementSize=1 Type=ub
329 .declare ubTempG Base=r44.0 ElementSize=1 Type=ub
330 .declare ubTempB Base=r46.0 ElementSize=1 Type=ub
332 // the r17 register (nTEMP0) is originally defined from "Common.inc"
333 // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
335 .declare wTemp0 Base=r17.0 ElementSize=2 Type=uw
340 // R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255))
341 // G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255))
342 // B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255))
343 // ITU-R conversion, Now we are using ITU-R conversion
344 // R = clip( Y + 1.402*(Cr-128)) // ITU-R
345 // G = clip( Y - 0.344*(Cb-128) - 0.714*(Cr-128))
346 // B = clip( Y + 1.772*(Cb-128))
348 // At the save module we have all 8 address sub-registers available.
349 // So we will use PING-PONG type of scheme to save the data using
350 // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
351 // reduce dependency. - rT
353 //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
354 //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
355 //Offsets are zero for buffer 0 and buffer 4.
356 //Y/U/V is also stored as R/G/B for the internal purpose
358 mov (4) a0.0<1>:uw r22.0<4;4,1>:uw
359 //the first line in the block 0
360 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
361 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
362 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
363 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
364 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
365 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
367 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
368 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
370 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
371 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
373 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
374 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
375 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
376 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
378 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
379 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
381 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
382 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
383 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
385 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
386 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
387 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
389 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
390 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
391 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
393 mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
394 mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
395 mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
397 //the second line in the block 0
399 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
400 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
401 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
402 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
403 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
404 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
406 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
407 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
409 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
410 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
412 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
413 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
414 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
415 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
417 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
418 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
420 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
421 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
422 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
424 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
425 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
426 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
429 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
430 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
431 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
432 mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
433 mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
434 mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
436 //the third line in the block 0
437 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
438 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
439 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
440 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
441 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
442 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
444 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
445 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
447 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
448 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
450 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
451 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
452 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
453 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
455 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
456 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
458 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
459 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
460 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
462 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
463 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
464 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
466 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
467 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
468 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
469 mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
470 mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
471 mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
473 //the fourth line in the block 0
474 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
475 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
476 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
477 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
478 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
479 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
481 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
482 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
484 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
485 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
487 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
488 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
489 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
490 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
492 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
493 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
495 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
496 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
497 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
499 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
500 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
501 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
503 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
504 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
505 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
506 mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
507 mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
508 mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
512 add (4) a0.0<1>:uw r22.0<4;4,1>:uw 512:uw
513 //the first line in the block 1
514 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
515 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
516 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
517 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
518 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
519 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
521 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
522 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
524 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
525 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
527 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
528 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
529 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
530 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
532 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
533 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
535 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
536 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
537 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
539 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
540 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
541 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
543 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
544 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
545 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
547 mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
548 mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
549 mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
551 //the second line in the block 1
553 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
554 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
555 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
556 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
557 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
558 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
560 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
561 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
563 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
564 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
566 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
567 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
568 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
569 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
571 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
572 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
574 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
575 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
576 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
578 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
579 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
580 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
583 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
584 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
585 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
586 mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
587 mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
588 mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
590 //the third line in the block 1
591 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
592 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
593 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
594 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
595 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
596 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
598 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
599 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
601 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
602 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
604 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
605 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
606 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
607 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
609 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
610 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
612 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
613 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
614 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
616 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
617 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
618 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
620 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
621 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
622 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
623 mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
624 mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
625 mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
627 //the fourth line in the block 1
628 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
629 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
630 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
631 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
632 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
633 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
635 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
636 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
638 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
639 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
641 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
642 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
643 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
644 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
646 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
647 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
649 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
650 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
651 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
653 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
654 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
655 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
657 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
658 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
659 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
660 mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
661 mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
662 mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
666 add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1024:uw
667 //the first line in the block 2
668 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
669 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
670 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
671 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
672 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
673 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
675 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
676 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
678 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
679 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
681 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
682 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
683 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
684 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
686 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
687 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
689 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
690 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
691 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
693 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
694 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
695 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
697 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
698 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
699 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
701 mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
702 mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
703 mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
705 //the second line in the block 2
707 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
708 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
709 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
710 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
711 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
712 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
714 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
715 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
717 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
718 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
720 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
721 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
722 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
723 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
725 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
726 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
728 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
729 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
730 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
732 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
733 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
734 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
737 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
738 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
739 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
740 mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
741 mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
742 mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
744 //the third line in the block 2
745 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
746 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
747 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
748 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
749 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
750 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
752 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
753 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
755 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
756 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
758 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
759 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
760 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
761 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
763 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
764 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
766 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
767 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
768 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
770 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
771 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
772 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
774 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
775 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
776 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
777 mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
778 mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
779 mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
781 //the fourth line in the block 2
782 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
783 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
784 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
785 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
786 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
787 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
789 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
790 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
792 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
793 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
795 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
796 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
797 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
798 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
800 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
801 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
803 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
804 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
805 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
807 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
808 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
809 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
811 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
812 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
813 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
814 mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
815 mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
816 mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>
820 add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1536:uw
821 //the first line in the block 3
822 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub
823 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub
824 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub
825 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub
826 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub
827 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub
829 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
830 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
832 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
833 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
835 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
836 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
837 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
838 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
840 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
841 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
843 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
844 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
845 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
847 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
848 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
849 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
851 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
852 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
853 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
855 mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4>
856 mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4>
857 mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4>
859 //the second line in the block 3
861 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub
862 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub
863 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub
864 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub
865 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub
866 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub
868 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
869 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
871 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
872 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
874 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
875 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
876 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
877 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
879 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
880 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
882 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
883 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
884 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
886 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
887 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
888 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
891 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
892 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
893 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
894 mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4>
895 mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4>
896 mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4>
898 //the third line in the block 3
899 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub
900 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub
901 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub
902 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub
903 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub
904 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub
906 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
907 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
909 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
910 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
912 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
913 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
914 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
915 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
917 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
918 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
920 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
921 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
922 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
924 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
925 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
926 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
928 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
929 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
930 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
931 mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4>
932 mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4>
933 mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4>
935 //the fourth line in the block 3
936 mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub
937 mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub
938 mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub
939 mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub
940 mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub
941 mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub
943 add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f
944 add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f
946 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
947 mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f
949 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
950 mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f
951 mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f
952 mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f
954 mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1>
955 mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f
957 mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f
958 mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f
959 mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f
961 mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f
962 mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f
963 mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f
965 mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1>
966 mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1>
967 mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1>
968 mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4>
969 mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4>
970 mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4>