Removed build warnings
[platform/adaptation/ap_samsung/libexynos-common.git] / libswconverter / swconvertor.c
1 /*
2  *
3  * Copyright 2012 Samsung Electronics S.LSI Co. LTD
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 /*
19  * @file    swconvertor.c
20  *
21  * @brief   Exynos_OMX specific define
22  *
23  * @author  ShinWon Lee (shinwon.lee@samsung.com)
24  *
25  * @version 1.0
26  *
27  * @history
28  *   2012.02.01 : Create
29  */
30
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include "swconverter.h"
35
36 #ifdef NEON_SUPPORT
37 #ifdef USE_NV12T_128X64
38 /* MFC 5.X */
39 /*
40  * Converts tiled data to linear
41  * Crops left, top, right, buttom
42  * 1. Y of NV12T to Y of YUV420P
43  * 2. Y of NV12T to Y of YUV420S
44  * 3. UV of NV12T to UV of YUV420S
45  *
46  * @param yuv420_dest
47  *   Y or UV plane address of YUV420[out]
48  *
49  * @param nv12t_src
50  *   Y or UV plane address of NV12T[in]
51  *
52  * @param yuv420_width
53  *   Width of YUV420[in]
54  *
55  * @param yuv420_height
56  *   Y: Height of YUV420, UV: Height/2 of YUV420[in]
57  *
58  * @param left
59  *   Crop size of left
60  *
61  * @param top
62  *   Crop size of top
63  *
64  * @param right
65  *   Crop size of right
66  *
67  * @param buttom
68  *   Crop size of buttom
69  */
70  void csc_tiled_to_linear_crop_neon(
71     unsigned char *yuv420_dest,
72     unsigned char *nv12t_src,
73     unsigned int yuv420_width,
74     unsigned int yuv420_height,
75     unsigned int left,
76     unsigned int top,
77     unsigned int right,
78     unsigned int buttom);
79
80 /*
81  * Converts and Deinterleaves tiled data to linear
82  * Crops left, top, right, buttom
83  * 1. UV of NV12T to UV of YUV420P
84  *
85  * @param yuv420_u_dest
86  *   U plane address of YUV420P[out]
87  *
88  * @param yuv420_v_dest
89  *   V plane address of YUV420P[out]
90  *
91  * @param nv12t_src
92  *   UV plane address of NV12T[in]
93  *
94  * @param yuv420_width
95  *   Width of YUV420[in]
96  *
97  * @param yuv420_uv_height
98  *   Height/2 of YUV420[in]
99  *
100  * @param left
101  *   Crop size of left
102  *
103  * @param top
104  *   Crop size of top
105  *
106  * @param right
107  *   Crop size of right
108  *
109  * @param buttom
110  *   Crop size of buttom
111  */
112 void csc_tiled_to_linear_deinterleave_crop_neon(
113     unsigned char *yuv420_u_dest,
114     unsigned char *yuv420_v_dest,
115     unsigned char *nv12t_uv_src,
116     unsigned int yuv420_width,
117     unsigned int yuv420_uv_height,
118     unsigned int left,
119     unsigned int top,
120     unsigned int right,
121     unsigned int buttom);
122
123 /*
124  * Converts linear data to tiled
125  * Crops left, top, right, buttom
126  * 1. Y of YUV420P to Y of NV12T
127  * 2. Y of YUV420S to Y of NV12T
128  * 3. UV of YUV420S to UV of NV12T
129  *
130  * @param nv12t_dest
131  *   Y or UV plane address of NV12T[out]
132  *
133  * @param yuv420_src
134  *   Y or UV plane address of YUV420P(S)[in]
135  *
136  * @param yuv420_width
137  *   Width of YUV420[in]
138  *
139  * @param yuv420_height
140  *   Y: Height of YUV420, UV: Height/2 of YUV420[in]
141  *
142  * @param left
143  *   Crop size of left
144  *
145  * @param top
146  *   Crop size of top
147  *
148  * @param right
149  *   Crop size of right
150  *
151  * @param buttom
152  *   Crop size of buttom
153  */
154 void csc_linear_to_tiled_crop_neon(
155     unsigned char *nv12t_dest,
156     unsigned char *yuv420_src,
157     unsigned int yuv420_width,
158     unsigned int yuv420_height,
159     unsigned int left,
160     unsigned int top,
161     unsigned int right,
162     unsigned int buttom);
163
164 /*
165  * Converts and Interleaves linear to tiled
166  * Crops left, top, right, buttom
167  * 1. UV of YUV420P to UV of NV12T
168  *
169  * @param nv12t_uv_dest
170  *   UV plane address of NV12T[out]
171  *
172  * @param yuv420p_u_src
173  *   U plane address of YUV420P[in]
174  *
175  * @param yuv420p_v_src
176  *   V plane address of YUV420P[in]
177  *
178  * @param yuv420_width
179  *   Width of YUV420[in]
180  *
181  * @param yuv420_uv_height
182  *   Height/2 of YUV420[in]
183  *
184  * @param left
185  *   Crop size of left
186  *
187  * @param top
188  *   Crop size of top
189  *
190  * @param right
191  *   Crop size of right
192  *
193  * @param buttom
194  *   Crop size of buttom
195  */
196 void csc_linear_to_tiled_interleave_crop_neon(
197     unsigned char *nv12t_uv_dest,
198     unsigned char *yuv420_u_src,
199     unsigned char *yuv420_v_src,
200     unsigned int yuv420_width,
201     unsigned int yuv420_height,
202     unsigned int left,
203     unsigned int top,
204     unsigned int right,
205     unsigned int buttom);
206 #else
207 /* others */
208 void csc_tiled_to_linear_y_neon(
209     unsigned char  *y_dst,
210     unsigned char  *y_src,
211     unsigned int    width,
212     unsigned int    height);
213
214 void csc_tiled_to_linear_uv_neon(
215     unsigned char  *uv_dst,
216     unsigned char  *uv_src,
217     unsigned int    width,
218     unsigned int    height);
219
220 void csc_tiled_to_linear_uv_deinterleave_neon(
221     unsigned char  *u_dst,
222     unsigned char  *v_dst,
223     unsigned char  *uv_src,
224     unsigned int    width,
225     unsigned int    height);
226 #endif /* USE_NV12T_128X64 */
227 /* common */
228 void csc_interleave_memcpy_neon(
229     unsigned char  *dest,
230     unsigned char  *src1,
231     unsigned char  *src2,
232     unsigned int    src_size);
233
234 void csc_BGRA8888_to_YUV420SP_NEON(
235      unsigned char *y_dst,
236      unsigned char *uv_dst,
237      unsigned char *rgb_src,
238      unsigned int   width,
239      unsigned int   height);
240
241 void csc_RGBA8888_to_YUV420SP_NEON(
242     unsigned char  *y_dst,
243     unsigned char  *uv_dst,
244     unsigned char  *rgb_src,
245     unsigned int    width,
246     unsigned int    height);
247 #endif /* NEON_SUPPORT */
248
249
250 #ifdef USE_NV12T_128X64
251 /*
252  * It support MFC 5.x tiled.
253  * Get tiled address of position(x,y)
254  *
255  * @param x_size
256  *   width of tiled[in]
257  *
258  * @param y_size
259  *   height of tiled[in]
260  *
261  * @param x_pos
262  *   x position of tield[in]
263  *
264  * @param src_size
265  *   y position of tield[in]
266  *
267  * @return
268  *   address of tiled data
269  */
270 static int tile_4x2_read(int x_size, int y_size, int x_pos, int y_pos)
271 {
272     int pixel_x_m1, pixel_y_m1;
273     int roundup_x, roundup_y;
274     int linear_addr0, linear_addr1, bank_addr ;
275     int x_addr;
276     int trans_addr;
277
278     pixel_x_m1 = x_size -1;
279     pixel_y_m1 = y_size -1;
280
281     roundup_x = ((pixel_x_m1 >> 7) + 1);
282     roundup_y = ((pixel_x_m1 >> 6) + 1);
283
284     x_addr = x_pos >> 2;
285
286     if ((y_size <= y_pos+32) && ( y_pos < y_size) &&
287         (((pixel_y_m1 >> 5) & 0x1) == 0) && (((y_pos >> 5) & 0x1) == 0)) {
288         linear_addr0 = (((y_pos & 0x1f) <<4) | (x_addr & 0xf));
289         linear_addr1 = (((y_pos >> 6) & 0xff) * roundup_x + ((x_addr >> 6) & 0x3f));
290
291         if (((x_addr >> 5) & 0x1) == ((y_pos >> 5) & 0x1))
292             bank_addr = ((x_addr >> 4) & 0x1);
293         else
294             bank_addr = 0x2 | ((x_addr >> 4) & 0x1);
295     } else {
296         linear_addr0 = (((y_pos & 0x1f) << 4) | (x_addr & 0xf));
297         linear_addr1 = (((y_pos >> 6) & 0xff) * roundup_x + ((x_addr >> 5) & 0x7f));
298
299         if (((x_addr >> 5) & 0x1) == ((y_pos >> 5) & 0x1))
300             bank_addr = ((x_addr >> 4) & 0x1);
301         else
302             bank_addr = 0x2 | ((x_addr >> 4) & 0x1);
303     }
304
305     linear_addr0 = linear_addr0 << 2;
306     trans_addr = (linear_addr1 <<13) | (bank_addr << 11) | linear_addr0;
307
308     return trans_addr;
309 }
310
311 /*
312  * It support MFC 5.x tiled.
313  * Converts tiled data to linear
314  * Crops left, top, right, buttom
315  * 1. Y of NV12T to Y of YUV420P
316  * 2. Y of NV12T to Y of YUV420S
317  * 3. UV of NV12T to UV of YUV420S
318  *
319  * @param yuv420_dest
320  *   Y or UV plane address of YUV420[out]
321  *
322  * @param nv12t_src
323  *   Y or UV plane address of NV12T[in]
324  *
325  * @param yuv420_width
326  *   Width of YUV420[in]
327  *
328  * @param yuv420_height
329  *   Y: Height of YUV420, UV: Height/2 of YUV420[in]
330  *
331  * @param left
332  *   Crop size of left
333  *
334  * @param top
335  *   Crop size of top
336  *
337  * @param right
338  *   Crop size of right
339  *
340  * @param buttom
341  *   Crop size of buttom
342  */
343 static void csc_tiled_to_linear_crop(
344     unsigned char *yuv420_dest,
345     unsigned char *nv12t_src,
346     unsigned int yuv420_width,
347     unsigned int yuv420_height,
348     unsigned int left,
349     unsigned int top,
350     unsigned int right,
351     unsigned int buttom)
352 {
353     unsigned int i, j;
354     unsigned int tiled_offset = 0, tiled_offset1 = 0;
355     unsigned int linear_offset = 0;
356     unsigned int temp1 = 0, temp2 = 0, temp3 = 0, temp4 = 0;
357
358     temp3 = yuv420_width-right;
359     temp1 = temp3-left;
360     /* real width is greater than or equal 256 */
361     if (temp1 >= 256) {
362         for (i=top; i<yuv420_height-buttom; i=i+1) {
363             j = left;
364             temp3 = (j>>8)<<8;
365             temp3 = temp3>>6;
366             temp4 = i>>5;
367             if (temp4 & 0x1) {
368                 /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
369                 tiled_offset = temp4-1;
370                 temp1 = ((yuv420_width+127)>>7)<<7;
371                 tiled_offset = tiled_offset*(temp1>>6);
372                 tiled_offset = tiled_offset+temp3;
373                 tiled_offset = tiled_offset+2;
374                 temp1 = (temp3>>2)<<2;
375                 tiled_offset = tiled_offset+temp1;
376                 tiled_offset = tiled_offset<<11;
377                 tiled_offset1 = tiled_offset+2048*2;
378                 temp4 = 8;
379             } else {
380                 temp2 = ((yuv420_height+31)>>5)<<5;
381                 if ((i+32)<temp2) {
382                     /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
383                     temp1 = temp3+2;
384                     temp1 = (temp1>>2)<<2;
385                     tiled_offset = temp3+temp1;
386                     temp1 = ((yuv420_width+127)>>7)<<7;
387                     tiled_offset = tiled_offset+temp4*(temp1>>6);
388                     tiled_offset = tiled_offset<<11;
389                     tiled_offset1 = tiled_offset+2048*6;
390                     temp4 = 8;
391                 } else {
392                     /* even2 fomula: x+x_block_num*y */
393                     temp1 = ((yuv420_width+127)>>7)<<7;
394                     tiled_offset = temp4*(temp1>>6);
395                     tiled_offset = tiled_offset+temp3;
396                     tiled_offset = tiled_offset<<11;
397                     tiled_offset1 = tiled_offset+2048*2;
398                     temp4 = 4;
399                 }
400             }
401
402             temp1 = i&0x1F;
403             tiled_offset = tiled_offset+64*(temp1);
404             tiled_offset1 = tiled_offset1+64*(temp1);
405             temp2 = yuv420_width-left-right;
406             linear_offset = temp2*(i-top);
407             temp3 = ((j+256)>>8)<<8;
408             temp3 = temp3-j;
409             temp1 = left&0x3F;
410             if (temp3 > 192) {
411                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset+temp1, 64-temp1);
412                 temp2 = ((left+63)>>6)<<6;
413                 temp3 = ((yuv420_width-right)>>6)<<6;
414                 if (temp2 == temp3) {
415                     temp2 = yuv420_width-right-(64-temp1);
416                 }
417                 memcpy(yuv420_dest+linear_offset+64-temp1, nv12t_src+tiled_offset+2048, 64);
418                 memcpy(yuv420_dest+linear_offset+128-temp1, nv12t_src+tiled_offset1, 64);
419                 memcpy(yuv420_dest+linear_offset+192-temp1, nv12t_src+tiled_offset1+2048, 64);
420                 linear_offset = linear_offset+256-temp1;
421             } else if (temp3 > 128) {
422                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset+2048+temp1, 64-temp1);
423                 memcpy(yuv420_dest+linear_offset+64-temp1, nv12t_src+tiled_offset1, 64);
424                 memcpy(yuv420_dest+linear_offset+128-temp1, nv12t_src+tiled_offset1+2048, 64);
425                 linear_offset = linear_offset+192-temp1;
426             } else if (temp3 > 64) {
427                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset1+temp1, 64-temp1);
428                 memcpy(yuv420_dest+linear_offset+64-temp1, nv12t_src+tiled_offset1+2048, 64);
429                 linear_offset = linear_offset+128-temp1;
430             } else if (temp3 > 0) {
431                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset1+2048+temp1, 64-temp1);
432                 linear_offset = linear_offset+64-temp1;
433             }
434
435             tiled_offset = tiled_offset+temp4*2048;
436             j = (left>>8)<<8;
437             j = j + 256;
438             temp2 = yuv420_width-right-256;
439             for (; j<=temp2; j=j+256) {
440                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, 64);
441                 tiled_offset1 = tiled_offset1+temp4*2048;
442                 memcpy(yuv420_dest+linear_offset+64, nv12t_src+tiled_offset+2048, 64);
443                 memcpy(yuv420_dest+linear_offset+128, nv12t_src+tiled_offset1, 64);
444                 tiled_offset = tiled_offset+temp4*2048;
445                 memcpy(yuv420_dest+linear_offset+192, nv12t_src+tiled_offset1+2048, 64);
446                 linear_offset = linear_offset+256;
447             }
448
449             tiled_offset1 = tiled_offset1+temp4*2048;
450             temp2 = yuv420_width-right-j;
451             if (temp2 > 192) {
452                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, 64);
453                 memcpy(yuv420_dest+linear_offset+64, nv12t_src+tiled_offset+2048, 64);
454                 memcpy(yuv420_dest+linear_offset+128, nv12t_src+tiled_offset1, 64);
455                 memcpy(yuv420_dest+linear_offset+192, nv12t_src+tiled_offset1+2048, temp2-192);
456             } else if (temp2 > 128) {
457                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, 64);
458                 memcpy(yuv420_dest+linear_offset+64, nv12t_src+tiled_offset+2048, 64);
459                 memcpy(yuv420_dest+linear_offset+128, nv12t_src+tiled_offset1, temp2-128);
460             } else if (temp2 > 64) {
461                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, 64);
462                 memcpy(yuv420_dest+linear_offset+64, nv12t_src+tiled_offset+2048, temp2-64);
463             } else {
464                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, temp2);
465             }
466         }
467     } else if (temp1 >= 64) {
468         for (i=top; i<(yuv420_height-buttom); i=i+1) {
469             j = left;
470             tiled_offset = tile_4x2_read(yuv420_width, yuv420_height, j, i);
471             temp2 = ((j+64)>>6)<<6;
472             temp2 = temp2-j;
473             linear_offset = temp1*(i-top);
474             temp4 = j&0x3;
475             tiled_offset = tiled_offset+temp4;
476             memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, temp2);
477             linear_offset = linear_offset+temp2;
478             j = j+temp2;
479             if ((j+64) <= temp3) {
480                 tiled_offset = tile_4x2_read(yuv420_width, yuv420_height, j, i);
481                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, 64);
482                 linear_offset = linear_offset+64;
483                 j = j+64;
484             }
485             if ((j+64) <= temp3) {
486                 tiled_offset = tile_4x2_read(yuv420_width, yuv420_height, j, i);
487                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, 64);
488                 linear_offset = linear_offset+64;
489                 j = j+64;
490             }
491             if (j < temp3) {
492                 tiled_offset = tile_4x2_read(yuv420_width, yuv420_height, j, i);
493                 temp2 = temp3-j;
494                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, temp2);
495             }
496         }
497     } else {
498         for (i=top; i<(yuv420_height-buttom); i=i+1) {
499             linear_offset = temp1*(i-top);
500             for (j=left; j<(yuv420_width-right); j=j+2) {
501                 tiled_offset = tile_4x2_read(yuv420_width, yuv420_height, j, i);
502                 temp4 = j&0x3;
503                 tiled_offset = tiled_offset+temp4;
504                 memcpy(yuv420_dest+linear_offset, nv12t_src+tiled_offset, 2);
505                 linear_offset = linear_offset+2;
506             }
507         }
508     }
509 }
510
511 /*
512  * Converts and Deinterleaves tiled data to linear
513  * Crops left, top, right, buttom
514  * 1. UV of NV12T to UV of YUV420P
515  *
516  * @param yuv420_u_dest
517  *   U plane address of YUV420P[out]
518  *
519  * @param yuv420_v_dest
520  *   V plane address of YUV420P[out]
521  *
522  * @param nv12t_src
523  *   UV plane address of NV12T[in]
524  *
525  * @param yuv420_width
526  *   Width of YUV420[in]
527  *
528  * @param yuv420_uv_height
529  *   Height/2 of YUV420[in]
530  *
531  * @param left
532  *   Crop size of left
533  *
534  * @param top
535  *   Crop size of top
536  *
537  * @param right
538  *   Crop size of right
539  *
540  * @param buttom
541  *   Crop size of buttom
542  */
543 static void csc_tiled_to_linear_deinterleave_crop(
544     unsigned char *yuv420_u_dest,
545     unsigned char *yuv420_v_dest,
546     unsigned char *nv12t_uv_src,
547     unsigned int yuv420_width,
548     unsigned int yuv420_uv_height,
549     unsigned int left,
550     unsigned int top,
551     unsigned int right,
552     unsigned int buttom)
553 {
554     unsigned int i, j;
555     unsigned int tiled_offset = 0, tiled_offset1 = 0;
556     unsigned int linear_offset = 0;
557     unsigned int temp1 = 0, temp2 = 0, temp3 = 0, temp4 = 0;
558
559     temp3 = yuv420_width-right;
560     temp1 = temp3-left;
561     /* real width is greater than or equal 256 */
562     if (temp1 >= 256) {
563         for (i=top; i<yuv420_uv_height-buttom; i=i+1) {
564             j = left;
565             temp3 = (j>>8)<<8;
566             temp3 = temp3>>6;
567             temp4 = i>>5;
568             if (temp4 & 0x1) {
569                 /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
570                 tiled_offset = temp4-1;
571                 temp1 = ((yuv420_width+127)>>7)<<7;
572                 tiled_offset = tiled_offset*(temp1>>6);
573                 tiled_offset = tiled_offset+temp3;
574                 tiled_offset = tiled_offset+2;
575                 temp1 = (temp3>>2)<<2;
576                 tiled_offset = tiled_offset+temp1;
577                 tiled_offset = tiled_offset<<11;
578                 tiled_offset1 = tiled_offset+2048*2;
579                 temp4 = 8;
580             } else {
581                 temp2 = ((yuv420_uv_height+31)>>5)<<5;
582                 if ((i+32)<temp2) {
583                     /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
584                     temp1 = temp3+2;
585                     temp1 = (temp1>>2)<<2;
586                     tiled_offset = temp3+temp1;
587                     temp1 = ((yuv420_width+127)>>7)<<7;
588                     tiled_offset = tiled_offset+temp4*(temp1>>6);
589                     tiled_offset = tiled_offset<<11;
590                     tiled_offset1 = tiled_offset+2048*6;
591                     temp4 = 8;
592                 } else {
593                     /* even2 fomula: x+x_block_num*y */
594                     temp1 = ((yuv420_width+127)>>7)<<7;
595                     tiled_offset = temp4*(temp1>>6);
596                     tiled_offset = tiled_offset+temp3;
597                     tiled_offset = tiled_offset<<11;
598                     tiled_offset1 = tiled_offset+2048*2;
599                     temp4 = 4;
600                 }
601             }
602
603             temp1 = i&0x1F;
604             tiled_offset = tiled_offset+64*(temp1);
605             tiled_offset1 = tiled_offset1+64*(temp1);
606             temp2 = yuv420_width-left-right;
607             linear_offset = temp2*(i-top)/2;
608             temp3 = ((j+256)>>8)<<8;
609             temp3 = temp3-j;
610             temp1 = left&0x3F;
611             if (temp3 > 192) {
612                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset, yuv420_v_dest+linear_offset, nv12t_uv_src+tiled_offset+temp1, 64-temp1);
613                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+(32-temp1/2),
614                                         yuv420_v_dest+linear_offset+(32-temp1/2),
615                                         nv12t_uv_src+tiled_offset+2048, 64);
616                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+(64-temp1/2),
617                                         yuv420_v_dest+linear_offset+(64-temp1/2),
618                                         nv12t_uv_src+tiled_offset1, 64);
619                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+(96-temp1/2),
620                                         yuv420_v_dest+linear_offset+(96-temp1/2),
621                                         nv12t_uv_src+tiled_offset1+2048, 64);
622                 linear_offset = linear_offset+128-temp1/2;
623             } else if (temp3 > 128) {
624                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
625                                         yuv420_v_dest+linear_offset,
626                                         nv12t_uv_src+tiled_offset+2048+temp1, 64-temp1);
627                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+(32-temp1/2),
628                                         yuv420_v_dest+linear_offset+(32-temp1/2),
629                                         nv12t_uv_src+tiled_offset1, 64);
630                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+(64-temp1/2),
631                                         yuv420_v_dest+linear_offset+(64-temp1/2),
632                                         nv12t_uv_src+tiled_offset1+2048, 64);
633                 linear_offset = linear_offset+96-temp1/2;
634             } else if (temp3 > 64) {
635                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
636                                         yuv420_v_dest+linear_offset,
637                                         nv12t_uv_src+tiled_offset1+temp1, 64-temp1);
638                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+(32-temp1/2),
639                                         yuv420_v_dest+linear_offset+(32-temp1/2),
640                                         nv12t_uv_src+tiled_offset1+2048, 64);
641                 linear_offset = linear_offset+64-temp1/2;
642             } else if (temp3 > 0) {
643                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
644                                         yuv420_v_dest+linear_offset,
645                                         nv12t_uv_src+tiled_offset1+2048+temp1, 64-temp1);
646                 linear_offset = linear_offset+32-temp1/2;
647             }
648
649             tiled_offset = tiled_offset+temp4*2048;
650             j = (left>>8)<<8;
651             j = j + 256;
652             temp2 = yuv420_width-right-256;
653             for (; j<=temp2; j=j+256) {
654                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
655                                         yuv420_v_dest+linear_offset,
656                                         nv12t_uv_src+tiled_offset, 64);
657                 tiled_offset1 = tiled_offset1+temp4*2048;
658                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+32,
659                                         yuv420_v_dest+linear_offset+32,
660                                         nv12t_uv_src+tiled_offset+2048, 64);
661                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+64,
662                                         yuv420_v_dest+linear_offset+64,
663                                         nv12t_uv_src+tiled_offset1, 64);
664                 tiled_offset = tiled_offset+temp4*2048;
665                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+96,
666                                         yuv420_v_dest+linear_offset+96,
667                                         nv12t_uv_src+tiled_offset1+2048, 64);
668                 linear_offset = linear_offset+128;
669             }
670
671             tiled_offset1 = tiled_offset1+temp4*2048;
672             temp2 = yuv420_width-right-j;
673             if (temp2 > 192) {
674                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
675                                         yuv420_v_dest+linear_offset,
676                                         nv12t_uv_src+tiled_offset, 64);
677                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+32,
678                                         yuv420_v_dest+linear_offset+32,
679                                         nv12t_uv_src+tiled_offset+2048, 64);
680                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+64,
681                                         yuv420_v_dest+linear_offset+64,
682                                         nv12t_uv_src+tiled_offset1, 64);
683                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+96,
684                                         yuv420_v_dest+linear_offset+96,
685                                         nv12t_uv_src+tiled_offset1+2048, temp2-192);
686             } else if (temp2 > 128) {
687                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
688                                         yuv420_v_dest+linear_offset,
689                                         nv12t_uv_src+tiled_offset, 64);
690                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+32,
691                                         yuv420_v_dest+linear_offset+32,
692                                         nv12t_uv_src+tiled_offset+2048, 64);
693                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+64,
694                                         yuv420_v_dest+linear_offset+64,
695                                         nv12t_uv_src+tiled_offset1, temp2-128);
696             } else if (temp2 > 64) {
697                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
698                                         yuv420_v_dest+linear_offset,
699                                         nv12t_uv_src+tiled_offset, 64);
700                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset+32,
701                                         yuv420_v_dest+linear_offset+32,
702                                         nv12t_uv_src+tiled_offset+2048, temp2-64);
703             } else {
704                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
705                                         yuv420_v_dest+linear_offset,
706                                         nv12t_uv_src+tiled_offset, temp2);
707             }
708         }
709     } else if (temp1 >= 64) {
710         for (i=top; i<(yuv420_uv_height-buttom); i=i+1) {
711             j = left;
712             tiled_offset = tile_4x2_read(yuv420_width, yuv420_uv_height, j, i);
713             temp2 = ((j+64)>>6)<<6;
714             temp2 = temp2-j;
715             temp3 = yuv420_width-right;
716             temp4 = temp3-left;
717             linear_offset = temp4*(i-top)/2;
718             temp4 = j&0x3;
719             tiled_offset = tiled_offset+temp4;
720             csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
721                                     yuv420_v_dest+linear_offset,
722                                     nv12t_uv_src+tiled_offset, temp2);
723             linear_offset = linear_offset+temp2/2;
724             j = j+temp2;
725             if ((j+64) <= temp3) {
726                 tiled_offset = tile_4x2_read(yuv420_width, yuv420_uv_height, j, i);
727                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
728                                         yuv420_v_dest+linear_offset,
729                                         nv12t_uv_src+tiled_offset, 64);
730                 linear_offset = linear_offset+32;
731                 j = j+64;
732             }
733             if ((j+64) <= temp3) {
734                 tiled_offset = tile_4x2_read(yuv420_width, yuv420_uv_height, j, i);
735                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
736                                         yuv420_v_dest+linear_offset,
737                                         nv12t_uv_src+tiled_offset, 64);
738                 linear_offset = linear_offset+32;
739                 j = j+64;
740             }
741             if (j < temp3) {
742                 tiled_offset = tile_4x2_read(yuv420_width, yuv420_uv_height, j, i);
743                 temp1 = temp3-j;
744                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
745                                         yuv420_v_dest+linear_offset,
746                                         nv12t_uv_src+tiled_offset, temp1);
747             }
748         }
749     } else {
750         for (i=top; i<(yuv420_uv_height-buttom); i=i+1) {
751             temp3 = yuv420_width-right;
752             temp4 = temp3-left;
753             linear_offset = temp4*(i-top)/2;
754             for (j=left; j<(yuv420_width-right); j=j+2) {
755                 tiled_offset = tile_4x2_read(yuv420_width, yuv420_uv_height, j, i);
756                 temp3 = j&0x3;
757                 tiled_offset = tiled_offset+temp3;
758                 csc_deinterleave_memcpy(yuv420_u_dest+linear_offset,
759                                         yuv420_v_dest+linear_offset,
760                                         nv12t_uv_src+tiled_offset, 2);
761                 linear_offset = linear_offset+1;
762             }
763         }
764     }
765 }
766
767 /*
768  * Converts linear data to tiled
769  * Crops left, top, right, buttom
770  * 1. Y of YUV420P to Y of NV12T
771  * 2. Y of YUV420S to Y of NV12T
772  * 3. UV of YUV420S to UV of NV12T
773  *
774  * @param nv12t_dest
775  *   Y or UV plane address of NV12T[out]
776  *
777  * @param yuv420_src
778  *   Y or UV plane address of YUV420P(S)[in]
779  *
780  * @param yuv420_width
781  *   Width of YUV420[in]
782  *
783  * @param yuv420_height
784  *   Y: Height of YUV420, UV: Height/2 of YUV420[in]
785  *
786  * @param left
787  *   Crop size of left
788  *
789  * @param top
790  *   Crop size of top
791  *
792  * @param right
793  *   Crop size of right
794  *
795  * @param buttom
796  *   Crop size of buttom
797  */
798 static void csc_linear_to_tiled_crop(
799     unsigned char *nv12t_dest,
800     unsigned char *yuv420_src,
801     unsigned int yuv420_width,
802     unsigned int yuv420_height,
803     unsigned int left,
804     unsigned int top,
805     unsigned int right,
806     unsigned int buttom)
807 {
808     unsigned int i, j;
809     unsigned int tiled_x_index = 0, tiled_y_index = 0;
810     unsigned int aligned_x_size = 0, aligned_y_size = 0;
811     unsigned int tiled_offset = 0;
812     unsigned int temp1 = 0, temp2 = 0;
813
814     aligned_y_size = ((yuv420_height-top-buttom)>>5)<<5;
815     aligned_x_size = ((yuv420_width-left-right)>>6)<<6;
816
817     for (i=0; i<aligned_y_size; i=i+32) {
818         for (j=0; j<aligned_x_size; j=j+64) {
819             tiled_offset = 0;
820             tiled_x_index = j>>6;
821             tiled_y_index = i>>5;
822             if (tiled_y_index & 0x1) {
823                 /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
824                 tiled_offset = tiled_y_index-1;
825                 temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
826                 tiled_offset = tiled_offset*(temp1>>6);
827                 tiled_offset = tiled_offset+tiled_x_index;
828                 tiled_offset = tiled_offset+2;
829                 temp1 = (tiled_x_index>>2)<<2;
830                 tiled_offset = tiled_offset+temp1;
831                 tiled_offset = tiled_offset<<11;
832             } else {
833                 temp2 = (((yuv420_height-top-buttom)+31)>>5)<<5;
834                 if ((i+32)<temp2) {
835                     /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
836                     temp1 = tiled_x_index+2;
837                     temp1 = (temp1>>2)<<2;
838                     tiled_offset = tiled_x_index+temp1;
839                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
840                     tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
841                     tiled_offset = tiled_offset<<11;
842                 } else {
843                     /* even2 fomula: x+x_block_num*y */
844                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
845                     tiled_offset = tiled_y_index*(temp1>>6);
846                     tiled_offset = tiled_offset+tiled_x_index;
847                     tiled_offset = tiled_offset<<11;
848                 }
849             }
850
851             memcpy(nv12t_dest+tiled_offset, yuv420_src+left+j+yuv420_width*(i+top), 64);
852             memcpy(nv12t_dest+tiled_offset+64*1, yuv420_src+left+j+yuv420_width*(i+top+1), 64);
853             memcpy(nv12t_dest+tiled_offset+64*2, yuv420_src+left+j+yuv420_width*(i+top+2), 64);
854             memcpy(nv12t_dest+tiled_offset+64*3, yuv420_src+left+j+yuv420_width*(i+top+3), 64);
855             memcpy(nv12t_dest+tiled_offset+64*4, yuv420_src+left+j+yuv420_width*(i+top+4), 64);
856             memcpy(nv12t_dest+tiled_offset+64*5, yuv420_src+left+j+yuv420_width*(i+top+5), 64);
857             memcpy(nv12t_dest+tiled_offset+64*6, yuv420_src+left+j+yuv420_width*(i+top+6), 64);
858             memcpy(nv12t_dest+tiled_offset+64*7, yuv420_src+left+j+yuv420_width*(i+top+7), 64);
859             memcpy(nv12t_dest+tiled_offset+64*8, yuv420_src+left+j+yuv420_width*(i+top+8), 64);
860             memcpy(nv12t_dest+tiled_offset+64*9, yuv420_src+left+j+yuv420_width*(i+top+9), 64);
861             memcpy(nv12t_dest+tiled_offset+64*10, yuv420_src+left+j+yuv420_width*(i+top+10), 64);
862             memcpy(nv12t_dest+tiled_offset+64*11, yuv420_src+left+j+yuv420_width*(i+top+11), 64);
863             memcpy(nv12t_dest+tiled_offset+64*12, yuv420_src+left+j+yuv420_width*(i+top+12), 64);
864             memcpy(nv12t_dest+tiled_offset+64*13, yuv420_src+left+j+yuv420_width*(i+top+13), 64);
865             memcpy(nv12t_dest+tiled_offset+64*14, yuv420_src+left+j+yuv420_width*(i+top+14), 64);
866             memcpy(nv12t_dest+tiled_offset+64*15, yuv420_src+left+j+yuv420_width*(i+top+15), 64);
867             memcpy(nv12t_dest+tiled_offset+64*16, yuv420_src+left+j+yuv420_width*(i+top+16), 64);
868             memcpy(nv12t_dest+tiled_offset+64*17, yuv420_src+left+j+yuv420_width*(i+top+17), 64);
869             memcpy(nv12t_dest+tiled_offset+64*18, yuv420_src+left+j+yuv420_width*(i+top+18), 64);
870             memcpy(nv12t_dest+tiled_offset+64*19, yuv420_src+left+j+yuv420_width*(i+top+19), 64);
871             memcpy(nv12t_dest+tiled_offset+64*20, yuv420_src+left+j+yuv420_width*(i+top+20), 64);
872             memcpy(nv12t_dest+tiled_offset+64*21, yuv420_src+left+j+yuv420_width*(i+top+21), 64);
873             memcpy(nv12t_dest+tiled_offset+64*22, yuv420_src+left+j+yuv420_width*(i+top+22), 64);
874             memcpy(nv12t_dest+tiled_offset+64*23, yuv420_src+left+j+yuv420_width*(i+top+23), 64);
875             memcpy(nv12t_dest+tiled_offset+64*24, yuv420_src+left+j+yuv420_width*(i+top+24), 64);
876             memcpy(nv12t_dest+tiled_offset+64*25, yuv420_src+left+j+yuv420_width*(i+top+25), 64);
877             memcpy(nv12t_dest+tiled_offset+64*26, yuv420_src+left+j+yuv420_width*(i+top+26), 64);
878             memcpy(nv12t_dest+tiled_offset+64*27, yuv420_src+left+j+yuv420_width*(i+top+27), 64);
879             memcpy(nv12t_dest+tiled_offset+64*28, yuv420_src+left+j+yuv420_width*(i+top+28), 64);
880             memcpy(nv12t_dest+tiled_offset+64*29, yuv420_src+left+j+yuv420_width*(i+top+29), 64);
881             memcpy(nv12t_dest+tiled_offset+64*30, yuv420_src+left+j+yuv420_width*(i+top+30), 64);
882             memcpy(nv12t_dest+tiled_offset+64*31, yuv420_src+left+j+yuv420_width*(i+top+31), 64);
883         }
884     }
885
886     for (i=aligned_y_size; i<(yuv420_height-top-buttom); i=i+2) {
887         for (j=0; j<aligned_x_size; j=j+64) {
888             tiled_offset = 0;
889             tiled_x_index = j>>6;
890             tiled_y_index = i>>5;
891             if (tiled_y_index & 0x1) {
892                 /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
893                 tiled_offset = tiled_y_index-1;
894                 temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
895                 tiled_offset = tiled_offset*(temp1>>6);
896                 tiled_offset = tiled_offset+tiled_x_index;
897                 tiled_offset = tiled_offset+2;
898                 temp1 = (tiled_x_index>>2)<<2;
899                 tiled_offset = tiled_offset+temp1;
900                 tiled_offset = tiled_offset<<11;
901             } else {
902                 temp2 = (((yuv420_height-top-buttom)+31)>>5)<<5;
903                 if ((i+32)<temp2) {
904                     /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
905                     temp1 = tiled_x_index+2;
906                     temp1 = (temp1>>2)<<2;
907                     tiled_offset = tiled_x_index+temp1;
908                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
909                     tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
910                     tiled_offset = tiled_offset<<11;
911                 } else {
912                     /* even2 fomula: x+x_block_num*y */
913                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
914                     tiled_offset = tiled_y_index*(temp1>>6);
915                     tiled_offset = tiled_offset+tiled_x_index;
916                     tiled_offset = tiled_offset<<11;
917                 }
918             }
919
920             temp1 = i&0x1F;
921             memcpy(nv12t_dest+tiled_offset+64*(temp1), yuv420_src+left+j+yuv420_width*(i+top), 64);
922             memcpy(nv12t_dest+tiled_offset+64*(temp1+1), yuv420_src+left+j+yuv420_width*(i+top+1), 64);
923         }
924     }
925
926     for (i=0; i<(yuv420_height-top-buttom); i=i+2) {
927         for (j=aligned_x_size; j<(yuv420_width-left-right); j=j+2) {
928             tiled_offset = 0;
929             tiled_x_index = j>>6;
930             tiled_y_index = i>>5;
931             if (tiled_y_index & 0x1) {
932                 /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
933                 tiled_offset = tiled_y_index-1;
934                 temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
935                 tiled_offset = tiled_offset*(temp1>>6);
936                 tiled_offset = tiled_offset+tiled_x_index;
937                 tiled_offset = tiled_offset+2;
938                 temp1 = (tiled_x_index>>2)<<2;
939                 tiled_offset = tiled_offset+temp1;
940                 tiled_offset = tiled_offset<<11;
941             } else {
942                 temp2 = (((yuv420_height-top-buttom)+31)>>5)<<5;
943                 if ((i+32)<temp2) {
944                     /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
945                     temp1 = tiled_x_index+2;
946                     temp1 = (temp1>>2)<<2;
947                     tiled_offset = tiled_x_index+temp1;
948                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
949                     tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
950                     tiled_offset = tiled_offset<<11;
951                 } else {
952                     /* even2 fomula: x+x_block_num*y */
953                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
954                     tiled_offset = tiled_y_index*(temp1>>6);
955                     tiled_offset = tiled_offset+tiled_x_index;
956                     tiled_offset = tiled_offset<<11;
957                 }
958             }
959
960             temp1 = i&0x1F;
961             temp2 = j&0x3F;
962             memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1), yuv420_src+left+j+yuv420_width*(i+top), 2);
963             memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1+1), yuv420_src+left+j+yuv420_width*(i+top+1), 2);
964         }
965     }
966 }
967
968 /*
969  * Converts and Interleaves linear to tiled
970  * Crops left, top, right, buttom
971  * 1. UV of YUV420P to UV of NV12T
972  *
973  * @param nv12t_uv_dest
974  *   UV plane address of NV12T[out]
975  *
976  * @param yuv420p_u_src
977  *   U plane address of YUV420P[in]
978  *
979  * @param yuv420p_v_src
980  *   V plane address of YUV420P[in]
981  *
982  * @param yuv420_width
983  *   Width of YUV420[in]
984  *
985  * @param yuv420_uv_height
986  *   Height/2 of YUV420[in]
987  *
988  * @param left
989  *   Crop size of left
990  *
991  * @param top
992  *   Crop size of top
993  *
994  * @param right
995  *   Crop size of right
996  *
997  * @param buttom
998  *   Crop size of buttom
999  */
1000 static void csc_linear_to_tiled_interleave_crop(
1001     unsigned char *nv12t_uv_dest,
1002     unsigned char *yuv420_u_src,
1003     unsigned char *yuv420_v_src,
1004     unsigned int yuv420_width,
1005     unsigned int yuv420_height,
1006     unsigned int left,
1007     unsigned int top,
1008     unsigned int right,
1009     unsigned int buttom)
1010 {
1011     unsigned int i, j;
1012     unsigned int tiled_x_index = 0, tiled_y_index = 0;
1013     unsigned int aligned_x_size = 0, aligned_y_size = 0;
1014     unsigned int tiled_offset = 0;
1015     unsigned int temp1 = 0, temp2 = 0;
1016
1017     aligned_y_size = ((yuv420_height-top-buttom)>>5)<<5;
1018     aligned_x_size = ((yuv420_width-left-right)>>6)<<6;
1019
1020     for (i=0; i<aligned_y_size; i=i+32) {
1021         for (j=0; j<aligned_x_size; j=j+64) {
1022             tiled_offset = 0;
1023             tiled_x_index = j>>6;
1024             tiled_y_index = i>>5;
1025             if (tiled_y_index & 0x1) {
1026                 /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
1027                 tiled_offset = tiled_y_index-1;
1028                 temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
1029                 tiled_offset = tiled_offset*(temp1>>6);
1030                 tiled_offset = tiled_offset+tiled_x_index;
1031                 tiled_offset = tiled_offset+2;
1032                 temp1 = (tiled_x_index>>2)<<2;
1033                 tiled_offset = tiled_offset+temp1;
1034                 tiled_offset = tiled_offset<<11;
1035             } else {
1036                 temp2 = (((yuv420_height-top-buttom)+31)>>5)<<5;
1037                 if ((i+32)<temp2) {
1038                     /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
1039                     temp1 = tiled_x_index+2;
1040                     temp1 = (temp1>>2)<<2;
1041                     tiled_offset = tiled_x_index+temp1;
1042                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
1043                     tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
1044                     tiled_offset = tiled_offset<<11;
1045                 } else {
1046                     /* even2 fomula: x+x_block_num*y */
1047                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
1048                     tiled_offset = tiled_y_index*(temp1>>6);
1049                     tiled_offset = tiled_offset+tiled_x_index;
1050                     tiled_offset = tiled_offset<<11;
1051                 }
1052             }
1053
1054             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset,
1055                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top),
1056                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top), 32);
1057             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*1,
1058                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+1),
1059                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+1), 32);
1060             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*2,
1061                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+2),
1062                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+2), 32);
1063             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*3,
1064                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+3),
1065                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+3), 32);
1066             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*4,
1067                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+4),
1068                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+4), 32);
1069             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*5,
1070                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+5),
1071                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+5), 32);
1072             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*6,
1073                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+6),
1074                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+6), 32);
1075             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*7,
1076                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+7),
1077                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+7), 32);
1078             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*8,
1079                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+8),
1080                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+8), 32);
1081             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*9,
1082                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+9),
1083                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+9), 32);
1084             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*10,
1085                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+10),
1086                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+10), 32);
1087             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*11,
1088                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+11),
1089                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+11), 32);
1090             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*12,
1091                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+12),
1092                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+12), 32);
1093             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*13,
1094                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+13),
1095                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+13), 32);
1096             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*14,
1097                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+14),
1098                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+14), 32);
1099             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*15,
1100                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+15),
1101                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+15), 32);
1102             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*16,
1103                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+16),
1104                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+16), 32);
1105             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*17,
1106                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+17),
1107                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+17), 32);
1108             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*18,
1109                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+18),
1110                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+18), 32);
1111             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*19,
1112                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+19),
1113                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+19), 32);
1114             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*20,
1115                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+20),
1116                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+20), 32);
1117             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*21,
1118                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+21),
1119                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+21), 32);
1120             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*22,
1121                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+22),
1122                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+22), 32);
1123             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*23,
1124                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+23),
1125                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+23), 32);
1126             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*24,
1127                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+24),
1128                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+24), 32);
1129             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*25,
1130                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+25),
1131                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+25), 32);
1132             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*26,
1133                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+26),
1134                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+26), 32);
1135             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*27,
1136                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+27),
1137                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+27), 32);
1138             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*28,
1139                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+28),
1140                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+28), 32);
1141             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*29,
1142                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+29),
1143                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+29), 32);
1144             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*30,
1145                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+30),
1146                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+30), 32);
1147             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*31,
1148                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top+31),
1149                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top+31), 32);
1150
1151         }
1152     }
1153
1154     for (i=aligned_y_size; i<(yuv420_height-top-buttom); i=i+1) {
1155         for (j=0; j<aligned_x_size; j=j+64) {
1156             tiled_offset = 0;
1157             tiled_x_index = j>>6;
1158             tiled_y_index = i>>5;
1159             if (tiled_y_index & 0x1) {
1160                 /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
1161                 tiled_offset = tiled_y_index-1;
1162                 temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
1163                 tiled_offset = tiled_offset*(temp1>>6);
1164                 tiled_offset = tiled_offset+tiled_x_index;
1165                 tiled_offset = tiled_offset+2;
1166                 temp1 = (tiled_x_index>>2)<<2;
1167                 tiled_offset = tiled_offset+temp1;
1168                 tiled_offset = tiled_offset<<11;
1169             } else {
1170                 temp2 = (((yuv420_height-top-buttom)+31)>>5)<<5;
1171                 if ((i+32)<temp2) {
1172                     /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
1173                     temp1 = tiled_x_index+2;
1174                     temp1 = (temp1>>2)<<2;
1175                     tiled_offset = tiled_x_index+temp1;
1176                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
1177                     tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
1178                     tiled_offset = tiled_offset<<11;
1179                 } else {
1180                     /* even2 fomula: x+x_block_num*y */
1181                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
1182                     tiled_offset = tiled_y_index*(temp1>>6);
1183                     tiled_offset = tiled_offset+tiled_x_index;
1184                     tiled_offset = tiled_offset<<11;
1185                 }
1186             }
1187             temp1 = i&0x1F;
1188             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*(temp1),
1189                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top),
1190                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top), 32);
1191        }
1192     }
1193
1194     for (i=0; i<(yuv420_height-top-buttom); i=i+1) {
1195         for (j=aligned_x_size; j<(yuv420_width-left-right); j=j+2) {
1196             tiled_offset = 0;
1197             tiled_x_index = j>>6;
1198             tiled_y_index = i>>5;
1199             if (tiled_y_index & 0x1) {
1200                 /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
1201                 tiled_offset = tiled_y_index-1;
1202                 temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
1203                 tiled_offset = tiled_offset*(temp1>>6);
1204                 tiled_offset = tiled_offset+tiled_x_index;
1205                 tiled_offset = tiled_offset+2;
1206                 temp1 = (tiled_x_index>>2)<<2;
1207                 tiled_offset = tiled_offset+temp1;
1208                 tiled_offset = tiled_offset<<11;
1209             } else {
1210                 temp2 = (((yuv420_height-top-buttom)+31)>>5)<<5;
1211                 if ((i+32)<temp2) {
1212                     /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
1213                     temp1 = tiled_x_index+2;
1214                     temp1 = (temp1>>2)<<2;
1215                     tiled_offset = tiled_x_index+temp1;
1216                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
1217                     tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
1218                     tiled_offset = tiled_offset<<11;
1219                 } else {
1220                     /* even2 fomula: x+x_block_num*y */
1221                     temp1 = (((yuv420_width-left-right)+127)>>7)<<7;
1222                     tiled_offset = tiled_y_index*(temp1>>6);
1223                     tiled_offset = tiled_offset+tiled_x_index;
1224                     tiled_offset = tiled_offset<<11;
1225                 }
1226             }
1227             temp1 = i&0x1F;
1228             temp2 = j&0x3F;
1229             csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+temp2+64*(temp1),
1230                                     yuv420_u_src+left/2+j/2+yuv420_width/2*(i+top),
1231                                     yuv420_v_src+left/2+j/2+yuv420_width/2*(i+top), 1);
1232        }
1233     }
1234 }
1235 #else
1236 /* 2D Configurable tiled memory access (TM)
1237  * Return the linear address from tiled position (x, y) */
1238 static unsigned int Tile2D_To_Linear(
1239     unsigned int width,
1240     unsigned int height,
1241     unsigned int xpos,
1242     unsigned int ypos,
1243     int crFlag)
1244 {
1245     int  tileNumX;
1246     int  tileX, tileY;
1247     int  tileAddr;
1248     int  offset;
1249     int  addr;
1250
1251     width = ((width + 15) / 16) * 16;
1252     height = ((height + 15) / 16) * 16;
1253     tileNumX = width / 16;
1254
1255     /* crFlag - 0: Y plane, 1: CbCr plane */
1256     if (crFlag == 0) {
1257         tileX = xpos / 16;
1258         tileY = ypos / 16;
1259         tileAddr = tileY * tileNumX + tileX;
1260         offset = (ypos & 15) * 16 + (xpos & 15);
1261         addr = (tileAddr << 8) | offset;
1262     } else {
1263         tileX = xpos / 16;
1264         tileY = ypos / 8;
1265         tileAddr = tileY * tileNumX + tileX;
1266         offset = (ypos & 7) * 16 + (xpos & 15);
1267         addr = (tileAddr << 7) | offset;
1268     }
1269
1270     return addr;
1271 }
1272
1273 static void Tile2D_To_YUV420(unsigned char *Y_plane, unsigned char *Cb_plane, unsigned char *Cr_plane,
1274                         unsigned int y_addr, unsigned int c_addr, unsigned int width, unsigned int height)
1275 {
1276     unsigned int x, y, j, k, l;
1277     unsigned int out_of_width, actual_width, data;
1278     unsigned long base_addr;
1279
1280     // y: 0, 16, 32, ...
1281     for (y = 0; y < height; y += 16) {
1282         // x: 0, 16, 32, ...
1283         for (x = 0; x < width; x += 16) {
1284             out_of_width = (x + 16) > width ? 1 : 0;
1285             base_addr = y_addr + Tile2D_To_Linear(width, height, x, y, 0);
1286
1287             for (k = 0; (k < 16) && ((y + k) < height); k++) {
1288                 actual_width = out_of_width ? ((width%4)?((width%16) / 4 + 1) : ((width%16) / 4)) : 4;
1289                 for (l = 0; l < actual_width; l++) {
1290                     data = *((unsigned int *)(base_addr + 16*k + l*4));
1291                     for (j = 0; (j < 4) && (x + l*4 + j) < width; j++) {
1292                         Y_plane[(y+k)*width + x + l*4 +j] = (data>>(8*j))&0xff;
1293                     }
1294                 }
1295             }
1296         }
1297     }
1298
1299     for (y = 0; y < height/2; y += 8) {
1300         for (x = 0; x < width; x += 16) {
1301             out_of_width = (x + 16) > width ? 1 : 0;
1302             base_addr = c_addr + Tile2D_To_Linear(width, height/2, x, y, 1);
1303             for (k = 0; (k < 8) && ((y+k) < height/2); k++) {
1304                 actual_width = out_of_width ? ((width%4) ? ((width%16) / 4 + 1) : ((width%16) / 4)) : 4;
1305                 for (l = 0; l < actual_width; l++) {
1306                     data = *((unsigned int *)(base_addr + 16*k + l*4));
1307                     for (j = 0; (j < 2) && (x/2 + l*2 +j) < width/2; j++) {
1308                         Cb_plane[(y+k)*width/2 + x/2 + l*2 +j] = (data>> (8*2*j))&0xff;
1309                         Cr_plane[(y+k)*width/2 + x/2 + l*2 +j] = (data>>(8*2*j+8))&0xff;
1310                     }
1311                 }
1312             }
1313         }
1314     }
1315 }
1316 #endif /* USE_NV12T_128X64 */
1317
1318 /*
1319  * De-interleaves src to dest1, dest2
1320  *
1321  * @param dest1
1322  *   Address of de-interleaved data[out]
1323  *
1324  * @param dest2
1325  *   Address of de-interleaved data[out]
1326  *
1327  * @param src
1328  *   Address of interleaved data[in]
1329  *
1330  * @param src_size
1331  *   Size of interleaved data[in]
1332  */
1333 void csc_deinterleave_memcpy(
1334     unsigned char *dest1,
1335     unsigned char *dest2,
1336     unsigned char *src,
1337     unsigned int src_size)
1338 {
1339     unsigned int i = 0;
1340     for(i=0; i<src_size/2; i++) {
1341         dest1[i] = src[i*2];
1342         dest2[i] = src[i*2+1];
1343     }
1344 }
1345
1346 /*
1347  * Interleaves src1, src2 to dest
1348  *
1349  * @param dest
1350  *   Address of interleaved data[out]
1351  *
1352  * @param src1
1353  *   Address of de-interleaved data[in]
1354  *
1355  * @param src2
1356  *   Address of de-interleaved data[in]
1357  *
1358  * @param src_size
1359  *   Size of de-interleaved data[in]
1360  */
1361 void csc_interleave_memcpy(
1362     unsigned char *dest,
1363     unsigned char *src1,
1364     unsigned char *src2,
1365     unsigned int src_size)
1366 {
1367 #ifdef NEON_SUPPORT
1368     csc_interleave_memcpy_neon(dest, src1, src2, src_size);
1369 #else
1370 /* not neon */
1371     unsigned int i = 0;
1372     for(i=0; i<src_size; i++) {
1373         dest[i*2] = src1[i];
1374         dest[i*2+1] = src2[i];
1375     }
1376 #endif /* NEON_SUPPORT */
1377 }
1378
1379 /*
1380  * Converts tiled data to linear.
1381  * 1. y of nv12t to y of yuv420p
1382  * 2. y of nv12t to y of yuv420s
1383  *
1384  * @param dst
1385  *   y address of yuv420[out]
1386  *
1387  * @param src
1388  *   y address of nv12t[in]
1389  *
1390  * @param yuv420_width
1391  *   real width of yuv420[in]
1392  *   it should be even
1393  *
1394  * @param yuv420_height
1395  *   real height of yuv420[in]
1396  *   it should be even.
1397  *
1398  */
1399 void csc_tiled_to_linear_y(
1400     unsigned char *y_dst,
1401     unsigned char *y_src,
1402     unsigned int width,
1403     unsigned int height)
1404 {
1405 #ifdef NEON_SUPPORT
1406 #ifdef USE_NV12T_128X64
1407     csc_tiled_to_linear_crop_neon(y_dst, y_src, width, height, 0, 0, 0, 0);
1408 #else
1409     csc_tiled_to_linear_y_neon(y_dst, y_src, width, height);
1410 #endif /* USE_NV12T_128X64 */
1411
1412 #else
1413 /* not neon */
1414 #ifdef USE_NV12T_128X64
1415     csc_tiled_to_linear_crop(y_dst, y_src, width, height, 0, 0, 0, 0);
1416 #else
1417     unsigned int i, j, k;
1418     unsigned int aligned_width, aligned_height;
1419     unsigned int tiled_width;
1420     unsigned int src_offset, dst_offset;
1421
1422     aligned_height = height & (~0xF);
1423     aligned_width = width & (~0xF);
1424     tiled_width = ((width + 15) >> 4) << 4;
1425
1426     for (i = 0; i < aligned_height; i = i + 16) {
1427         for (j = 0; j<aligned_width; j = j + 16) {
1428             src_offset = (tiled_width * i) + (j << 4);
1429             dst_offset = width * i + j;
1430             for (k = 0; k < 8; k++) {
1431                 memcpy(y_dst + dst_offset, y_src + src_offset, 16);
1432                 src_offset += 16;
1433                 dst_offset += width;
1434                 memcpy(y_dst + dst_offset, y_src + src_offset, 16);
1435                 src_offset += 16;
1436                 dst_offset += width;
1437             }
1438         }
1439         if (aligned_width != width) {
1440             src_offset = (tiled_width * i) + (j << 4);
1441             dst_offset = width * i + j;
1442             for (k = 0; k < 8; k++) {
1443                 memcpy(y_dst + dst_offset, y_src + src_offset, width - j);
1444                 src_offset += 16;
1445                 dst_offset += width;
1446                 memcpy(y_dst + dst_offset, y_src + src_offset, width - j);
1447                 src_offset += 16;
1448                 dst_offset += width;
1449             }
1450         }
1451     }
1452
1453     if (aligned_height != height) {
1454         for (j = 0; j<aligned_width; j = j + 16) {
1455             src_offset = (tiled_width * i) + (j << 4);
1456             dst_offset = width * i + j;
1457             for (k = 0; k < height - aligned_height; k = k + 2) {
1458                 memcpy(y_dst + dst_offset, y_src + src_offset, 16);
1459                 src_offset += 16;
1460                 dst_offset += width;
1461                 memcpy(y_dst + dst_offset, y_src + src_offset, 16);
1462                 src_offset += 16;
1463                 dst_offset += width;
1464             }
1465         }
1466         if (aligned_width != width) {
1467             src_offset = (tiled_width * i) + (j << 4);
1468             dst_offset = width * i + j;
1469             for (k = 0; k < height - aligned_height; k = k + 2) {
1470                 memcpy(y_dst + dst_offset, y_src + src_offset, width - j);
1471                 src_offset += 16;
1472                 dst_offset += width;
1473                 memcpy(y_dst + dst_offset, y_src + src_offset, width - j);
1474                 src_offset += 16;
1475                 dst_offset += width;
1476             }
1477         }
1478     }
1479 #endif /* USE_NV12T_128X64 */
1480 #endif /* NEON_SUPPORT */
1481 }
1482
1483 /*
1484  * Converts tiled data to linear
1485  * 1. uv of nv12t to y of yuv420s
1486  *
1487  * @param dst
1488  *   uv address of yuv420s[out]
1489  *
1490  * @param src
1491  *   uv address of nv12t[in]
1492  *
1493  * @param yuv420_width
1494  *   real width of yuv420s[in]
1495  *
1496  * @param yuv420_height
1497  *   real height of yuv420s[in]
1498  *
1499  */
1500 void csc_tiled_to_linear_uv(
1501     unsigned char *uv_dst,
1502     unsigned char *uv_src,
1503     unsigned int width,
1504     unsigned int height)
1505 {
1506 #ifdef NEON_SUPPORT
1507 #ifdef USE_NV12T_128X64
1508     csc_tiled_to_linear_crop_neon(uv_dst, uv_src, width, height, 0, 0, 0, 0);
1509 #else
1510     csc_tiled_to_linear_uv_neon(uv_dst, uv_src, width, height);
1511 #endif /* USE_NV12T_128X64 */
1512
1513 #else
1514 /* not neon */
1515 #ifdef USE_NV12T_128X64
1516     csc_tiled_to_linear_crop(uv_dst, uv_src, width, height, 0, 0, 0, 0);
1517 #else
1518     unsigned int i, j, k;
1519     unsigned int aligned_width, aligned_height;
1520     unsigned int tiled_width;
1521     unsigned int src_offset, dst_offset;
1522
1523     aligned_height = height & (~0x7);
1524     aligned_width = width & (~0xF);
1525     tiled_width = ((width + 15) >> 4) << 4;
1526
1527     for (i = 0; i < aligned_height; i = i + 8) {
1528         for (j = 0; j<aligned_width; j = j + 16) {
1529             src_offset = (tiled_width * i) + (j << 3);
1530             dst_offset = width * i + j;
1531             for (k = 0; k < 4; k++) {
1532                 memcpy(uv_dst + dst_offset, uv_src + src_offset, 16);
1533                 src_offset += 16;
1534                 dst_offset += width;
1535                 memcpy(uv_dst + dst_offset, uv_src + src_offset, 16);
1536                 src_offset += 16;
1537                 dst_offset += width;
1538             }
1539         }
1540         if (aligned_width != width) {
1541             src_offset = (tiled_width * i) + (j << 3);
1542             dst_offset = width * i + j;
1543             for (k = 0; k < 4; k++) {
1544                 memcpy(uv_dst + dst_offset, uv_src + src_offset, width - j);
1545                 src_offset += 16;
1546                 dst_offset += width;
1547                 memcpy(uv_dst + dst_offset, uv_src + src_offset, width - j);
1548                 src_offset += 16;
1549                 dst_offset += width;
1550             }
1551         }
1552     }
1553
1554     if (aligned_height != height) {
1555         for (j = 0; j<aligned_width; j = j + 16) {
1556             src_offset = (tiled_width * i) + (j << 3);
1557             dst_offset = width * i + j;
1558             for (k = 0; k < height - aligned_height; k = k + 1) {
1559                 memcpy(uv_dst + dst_offset, uv_src + src_offset, 16);
1560                 src_offset += 16;
1561                 dst_offset += width;
1562             }
1563         }
1564         if (aligned_width != width) {
1565             src_offset = (tiled_width * i) + (j << 3);
1566             dst_offset = width * i + j;
1567             for (k = 0; k < height - aligned_height; k = k + 1) {
1568                 memcpy(uv_dst + dst_offset, uv_src + src_offset, width - j);
1569                 src_offset += 16;
1570                 dst_offset += width;
1571             }
1572         }
1573     }
1574 #endif /* USE_NV12T_128X64 */
1575 #endif /* NEON_SUPPORT */
1576 }
1577
1578 /*
1579  * Converts tiled data to linear
1580  * 1. uv of nt12t to uv of yuv420p
1581  *
1582  * @param u_dst
1583  *   u address of yuv420p[out]
1584  *
1585  * @param v_dst
1586  *   v address of yuv420p[out]
1587  *
1588  * @param uv_src
1589  *   uv address of nt12t[in]
1590  *
1591  * @param yuv420_width
1592  *   real width of yuv420p[in]
1593  *
1594  * @param yuv420_height
1595  *   real height of yuv420p[in]
1596  */
1597 void csc_tiled_to_linear_uv_deinterleave(
1598     unsigned char *u_dst,
1599     unsigned char *v_dst,
1600     unsigned char *uv_src,
1601     unsigned int width,
1602     unsigned int height)
1603 {
1604 #ifdef NEON_SUPPORT
1605 #ifdef USE_NV12T_128X64
1606     csc_tiled_to_linear_deinterleave_crop_neon(u_dst, v_dst, uv_src, width, height, 0, 0, 0, 0);
1607 #else
1608     csc_tiled_to_linear_uv_deinterleave_neon(u_dst, v_dst, uv_src, width, height);
1609 #endif /* USE_NV12T_128X64 */
1610
1611 #else
1612 /*not neon */
1613 #ifdef USE_NV12_128X64
1614     csc_tiled_to_linear_deinterleave_crop(u_dst, v_dst, uv_src, width, height,
1615                                           0, 0, 0, 0);
1616 #else
1617     unsigned int i, j, k;
1618     unsigned int aligned_width, aligned_height;
1619     unsigned int tiled_width;
1620     unsigned int src_offset, dst_offset;
1621
1622     aligned_height = height & (~0x7);
1623     aligned_width = width & (~0xF);
1624     tiled_width = ((width + 15) >> 4) << 4;
1625
1626     for (i = 0; i < aligned_height; i = i + 8) {
1627         for (j = 0; j<aligned_width; j = j + 16) {
1628             src_offset = (tiled_width * i) + (j << 3);
1629             dst_offset = (width >> 1) * i + (j >> 1);
1630             for (k = 0; k < 4; k++) {
1631                 csc_deinterleave_memcpy(u_dst + dst_offset, v_dst + dst_offset,
1632                                         uv_src + src_offset, 16);
1633                 src_offset += 16;
1634                 dst_offset += width >> 1;
1635                 csc_deinterleave_memcpy(u_dst + dst_offset, v_dst + dst_offset,
1636                                         uv_src + src_offset, 16);
1637                 src_offset += 16;
1638                 dst_offset += width >> 1;
1639             }
1640         }
1641         if (aligned_width != width) {
1642             src_offset = (tiled_width * i) + (j << 3);
1643             dst_offset = (width >> 1) * i + (j >> 1);
1644             for (k = 0; k < 4; k++) {
1645                 csc_deinterleave_memcpy(u_dst + dst_offset, v_dst + dst_offset,
1646                                         uv_src + src_offset, width - j);
1647                 src_offset += 16;
1648                 dst_offset += width >> 1;
1649                 csc_deinterleave_memcpy(u_dst + dst_offset, v_dst + dst_offset,
1650                                         uv_src + src_offset, width - j);
1651                 src_offset += 16;
1652                 dst_offset += width >> 1;
1653             }
1654         }
1655     }
1656     if (aligned_height != height) {
1657         for (j = 0; j<aligned_width; j = j + 16) {
1658             src_offset = (tiled_width * i) + (j << 3);
1659             dst_offset = (width >> 1) * i + (j >> 1);
1660             for (k = 0; k < height - aligned_height; k = k + 1) {
1661                 csc_deinterleave_memcpy(u_dst + dst_offset, v_dst + dst_offset,
1662                                         uv_src + src_offset, 16);
1663                 src_offset += 16;
1664                 dst_offset += width >> 1;
1665             }
1666         }
1667         if (aligned_width != width) {
1668             src_offset = (tiled_width * i) + (j << 3);
1669             dst_offset = (width >> 1) * i + (j >> 1);
1670             for (k = 0; k < height - aligned_height; k = k + 1) {
1671                 csc_deinterleave_memcpy(u_dst + dst_offset, v_dst + dst_offset,
1672                                         uv_src + src_offset, width - j);
1673                 src_offset += 16;
1674                 dst_offset += width >> 1;
1675             }
1676         }
1677     }
1678 #endif /* USE_NV12T_128X64 */
1679 #endif /* NEON_SUPPORT */
1680 }
1681
1682 /*
1683  * Converts linear data to tiled
1684  * 1. y of yuv420 to y of nv12t
1685  *
1686  * @param dst
1687  *   y address of nv12t[out]
1688  *
1689  * @param src
1690  *   y address of yuv420[in]
1691  *
1692  * @param yuv420_width
1693  *   real width of yuv420[in]
1694  *   it should be even
1695  *
1696  * @param yuv420_height
1697  *   real height of yuv420[in]
1698  *   it should be even.
1699  *
1700  */
1701 void csc_linear_to_tiled_y(
1702     unsigned char *y_dst,
1703     unsigned char *y_src,
1704     unsigned int width,
1705     unsigned int height)
1706 {
1707 #ifdef USE_NV12T_128X64
1708 #ifdef NEON_SUPPORT
1709     csc_linear_to_tiled_crop_neon(y_dst, y_src, width, height, 0, 0, 0, 0);
1710 #else
1711     csc_linear_to_tiled_crop(y_dst, y_src, width, height, 0, 0, 0, 0);
1712 #endif /* NEON_SUPPORT */
1713 #else
1714     unsigned char *dst = y_dst;
1715     unsigned char *src = y_src;
1716     unsigned int w = width;
1717     unsigned int h = height;
1718 #endif /* USE_NV12T_128X64 */
1719 }
1720
1721 /*
1722  * Converts and interleaves linear data to tiled
1723  * 1. uv of nv12t to uv of yuv420
1724  *
1725  * @param dst
1726  *   uv address of nv12t[out]
1727  *
1728  * @param src
1729  *   u address of yuv420[in]
1730  *
1731  * @param src
1732  *   v address of yuv420[in]
1733  *
1734  * @param yuv420_width
1735  *   real width of yuv420[in]
1736  *
1737  * @param yuv420_height
1738  *   real height of yuv420[in]
1739  *
1740  */
1741 void csc_linear_to_tiled_uv(
1742     unsigned char *uv_dst,
1743     unsigned char *u_src,
1744     unsigned char *v_src,
1745     unsigned int width,
1746     unsigned int height)
1747 {
1748 #ifdef USE_NV12T_128X64
1749 #ifdef NEON_SUPPORT
1750     csc_linear_to_tiled_interleave_crop_neon(uv_dst, u_src, v_src, width, height, 0, 0, 0, 0);
1751 #else
1752     csc_linear_to_tiled_interleave_crop(uv_dst, u_src, v_src, width, height, 0, 0, 0, 0);
1753 #endif /* NEON_SUPPORT */
1754 #else
1755     unsigned char *uv = uv_dst;
1756     unsigned char *u = u_src;
1757     unsigned char *v = v_src;
1758     unsigned int w = width;
1759     unsigned int h = height;
1760 #endif /* USE_NV12T_128X64 */
1761 }
1762
1763 /*
1764  * Converts RGB565 to YUV420P
1765  *
1766  * @param y_dst
1767  *   Y plane address of YUV420P[out]
1768  *
1769  * @param u_dst
1770  *   U plane address of YUV420P[out]
1771  *
1772  * @param v_dst
1773  *   V plane address of YUV420P[out]
1774  *
1775  * @param rgb_src
1776  *   Address of RGB565[in]
1777  *
1778  * @param width
1779  *   Width of RGB565[in]
1780  *
1781  * @param height
1782  *   Height of RGB565[in]
1783  */
1784 void csc_RGB565_to_YUV420P(
1785     unsigned char *y_dst,
1786     unsigned char *u_dst,
1787     unsigned char *v_dst,
1788     unsigned char *rgb_src,
1789     int width,
1790     int height)
1791 {
1792     int i, j;
1793     unsigned int tmp;
1794
1795     unsigned int R, G, B;
1796     unsigned int Y, U, V;
1797
1798     unsigned int offset1 = width * height;
1799     unsigned int offset2 = width/2 * height/2;
1800
1801     unsigned short int *pSrc = (unsigned short int *)rgb_src;
1802
1803     unsigned char *pDstY = (unsigned char *)y_dst;
1804     unsigned char *pDstU = (unsigned char *)u_dst;
1805     unsigned char *pDstV = (unsigned char *)v_dst;
1806
1807     unsigned int yIndex = 0;
1808     unsigned int uIndex = 0;
1809     unsigned int vIndex = 0;
1810
1811     for (j = 0; j < height; j++) {
1812         for (i = 0; i < width; i++) {
1813             tmp = pSrc[j * width + i];
1814
1815             R = (tmp & 0x0000F800) >> 8;
1816             G = (tmp & 0x000007E0) >> 3;
1817             B = (tmp & 0x0000001F);
1818             B = B << 3;
1819
1820             Y = ((66 * R) + (129 * G) + (25 * B) + 128);
1821             Y = Y >> 8;
1822             Y += 16;
1823
1824             pDstY[yIndex++] = (unsigned char)Y;
1825
1826             if ((j % 2) == 0 && (i % 2) == 0) {
1827                 U = ((-38 * R) - (74 * G) + (112 * B) + 128);
1828                 U = U >> 8;
1829                 U += 128;
1830                 V = ((112 * R) - (94 * G) - (18 * B) + 128);
1831                 V = V >> 8;
1832                 V += 128;
1833
1834                 pDstU[uIndex++] = (unsigned char)U;
1835                 pDstV[vIndex++] = (unsigned char)V;
1836             }
1837         }
1838     }
1839 }
1840
1841 /*
1842  * Converts RGB565 to YUV420SP
1843  *
1844  * @param y_dst
1845  *   Y plane address of YUV420SP[out]
1846  *
1847  * @param uv_dst
1848  *   UV plane address of YUV420SP[out]
1849  *
1850  * @param rgb_src
1851  *   Address of RGB565[in]
1852  *
1853  * @param width
1854  *   Width of RGB565[in]
1855  *
1856  * @param height
1857  *   Height of RGB565[in]
1858  */
1859 void csc_RGB565_to_YUV420SP(
1860     unsigned char *y_dst,
1861     unsigned char *uv_dst,
1862     unsigned char *rgb_src,
1863     int width,
1864     int height)
1865 {
1866     int i, j;
1867     unsigned int tmp;
1868
1869     unsigned int R, G, B;
1870     unsigned int Y, U, V;
1871
1872     unsigned int offset = width * height;
1873
1874     unsigned short int *pSrc = (unsigned short int *)rgb_src;
1875
1876     unsigned char *pDstY = (unsigned char *)y_dst;
1877     unsigned char *pDstUV = (unsigned char *)uv_dst;
1878
1879     unsigned int yIndex = 0;
1880     unsigned int uvIndex = 0;
1881
1882     for (j = 0; j < height; j++) {
1883         for (i = 0; i < width; i++) {
1884             tmp = pSrc[j * width + i];
1885
1886             R = (tmp & 0x0000F800) >> 11;
1887             R = R * 8;
1888             G = (tmp & 0x000007E0) >> 5;
1889             G = G * 4;
1890             B = (tmp & 0x0000001F);
1891             B = B * 8;
1892
1893             Y = ((66 * R) + (129 * G) + (25 * B) + 128);
1894             Y = Y >> 8;
1895             Y += 16;
1896
1897             pDstY[yIndex++] = (unsigned char)Y;
1898
1899             if ((j % 2) == 0 && (i % 2) == 0) {
1900                 U = ((-38 * R) - (74 * G) + (112 * B) + 128);
1901                 U = U >> 8;
1902                 U += 128;
1903                 V = ((112 * R) - (94 * G) - (18 * B) + 128);
1904                 V = V >> 8;
1905                 V += 128;
1906
1907                 pDstUV[uvIndex++] = (unsigned char)U;
1908                 pDstUV[uvIndex++] = (unsigned char)V;
1909             }
1910         }
1911     }
1912 }
1913
1914 /*
1915  * Converts BGRA8888 to YUV420P
1916  *
1917  * @param y_dst
1918  *   Y plane address of YUV420P[out]
1919  *
1920  * @param u_dst
1921  *   U plane address of YUV420P[out]
1922  *
1923  * @param v_dst
1924  *   V plane address of YUV420P[out]
1925  *
1926  * @param rgb_src
1927  *   Address of BGRA8888[in]
1928  *
1929  * @param width
1930  *   Width of BGRA8888[in]
1931  *
1932  * @param height
1933  *   Height of BGRA8888[in]
1934  */
1935 void csc_BGRA8888_to_YUV420P(
1936     unsigned char *y_dst,
1937     unsigned char *u_dst,
1938     unsigned char *v_dst,
1939     unsigned char *rgb_src,
1940     unsigned int width,
1941     unsigned int height)
1942 {
1943     unsigned int i, j;
1944     unsigned int tmp;
1945
1946     unsigned int R, G, B;
1947     unsigned int Y, U, V;
1948
1949     unsigned int offset1 = width * height;
1950     unsigned int offset2 = width/2 * height/2;
1951
1952     unsigned int *pSrc = (unsigned int *)rgb_src;
1953
1954     unsigned char *pDstY = (unsigned char *)y_dst;
1955     unsigned char *pDstU = (unsigned char *)u_dst;
1956     unsigned char *pDstV = (unsigned char *)v_dst;
1957
1958     unsigned int yIndex = 0;
1959     unsigned int uIndex = 0;
1960     unsigned int vIndex = 0;
1961
1962     for (j = 0; j < height; j++) {
1963         for (i = 0; i < width; i++) {
1964             tmp = pSrc[j * width + i];
1965
1966             R = (tmp & 0x00FF0000) >> 16;
1967             G = (tmp & 0x0000FF00) >> 8;
1968             B = (tmp & 0x000000FF);
1969
1970             Y = ((66 * R) + (129 * G) + (25 * B) + 128);
1971             Y = Y >> 8;
1972             Y += 16;
1973
1974             pDstY[yIndex++] = (unsigned char)Y;
1975
1976             if ((j % 2) == 0 && (i % 2) == 0) {
1977                 U = ((-38 * R) - (74 * G) + (112 * B) + 128);
1978                 U = U >> 8;
1979                 U += 128;
1980                 V = ((112 * R) - (94 * G) - (18 * B) + 128);
1981                 V = V >> 8;
1982                 V += 128;
1983
1984                 pDstU[uIndex++] = (unsigned char)U;
1985                 pDstV[vIndex++] = (unsigned char)V;
1986             }
1987         }
1988     }
1989 }
1990
1991 /*
1992  * Converts RGBA8888 to YUV420P
1993  *
1994  * @param y_dst
1995  *   Y plane address of YUV420P[out]
1996  *
1997  * @param u_dst
1998  *   U plane address of YUV420P[out]
1999  *
2000  * @param v_dst
2001  *   V plane address of YUV420P[out]
2002  *
2003  * @param rgb_src
2004  *   Address of RGBA8888[in]
2005  *
2006  * @param width
2007  *   Width of RGBA8888[in]
2008  *
2009  * @param height
2010  *   Height of RGBA8888[in]
2011  */
2012 void csc_RGBA8888_to_YUV420P(
2013     unsigned char *y_dst,
2014     unsigned char *u_dst,
2015     unsigned char *v_dst,
2016     unsigned char *rgb_src,
2017     unsigned int width,
2018     unsigned int height)
2019 {
2020     unsigned int i, j;
2021     unsigned int tmp;
2022
2023     unsigned int R, G, B;
2024     unsigned int Y, U, V;
2025
2026     unsigned int offset1 = width * height;
2027     unsigned int offset2 = width/2 * height/2;
2028
2029     unsigned int *pSrc = (unsigned int *)rgb_src;
2030
2031     unsigned char *pDstY = (unsigned char *)y_dst;
2032     unsigned char *pDstU = (unsigned char *)u_dst;
2033     unsigned char *pDstV = (unsigned char *)v_dst;
2034
2035     unsigned int yIndex = 0;
2036     unsigned int uIndex = 0;
2037     unsigned int vIndex = 0;
2038
2039     for (j = 0; j < height; j++) {
2040         for (i = 0; i < width; i++) {
2041             tmp = pSrc[j * width + i];
2042
2043             B = (tmp & 0x00FF0000) >> 16;
2044             G = (tmp & 0x0000FF00) >> 8;
2045             R = (tmp & 0x000000FF);
2046
2047             Y = ((66 * R) + (129 * G) + (25 * B) + 128);
2048             Y = Y >> 8;
2049             Y += 16;
2050
2051             pDstY[yIndex++] = (unsigned char)Y;
2052
2053             if ((j % 2) == 0 && (i % 2) == 0) {
2054                 U = ((-38 * R) - (74 * G) + (112 * B) + 128);
2055                 U = U >> 8;
2056                 U += 128;
2057                 V = ((112 * R) - (94 * G) - (18 * B) + 128);
2058                 V = V >> 8;
2059                 V += 128;
2060
2061                 pDstU[uIndex++] = (unsigned char)U;
2062                 pDstV[vIndex++] = (unsigned char)V;
2063             }
2064         }
2065     }
2066 }
2067
2068 /*
2069  * Converts BGRA8888 to YUV420SP
2070  *
2071  * @param y_dst
2072  *   Y plane address of YUV420SP[out]
2073  *
2074  * @param uv_dst
2075  *   UV plane address of YUV420SP[out]
2076  *
2077  * @param rgb_src
2078  *   Address of BGRA8888[in]
2079  *
2080  * @param width
2081  *   Width of BGRA8888[in]
2082  *
2083  * @param height
2084  *   Height of BGRA8888[in]
2085  */
2086 void csc_BGRA8888_to_YUV420SP(
2087     unsigned char *y_dst,
2088     unsigned char *uv_dst,
2089     unsigned char *rgb_src,
2090     unsigned int width,
2091     unsigned int height)
2092 {
2093 #ifdef NEON_SUPPORT
2094     csc_BGRA8888_to_YUV420SP_NEON(y_dst, uv_dst, rgb_src, width, height);
2095 #else
2096     unsigned int i, j;
2097     unsigned int tmp;
2098
2099     unsigned int R, G, B;
2100     unsigned int Y, U, V;
2101
2102     unsigned int offset = width * height;
2103
2104     unsigned int *pSrc = (unsigned int *)rgb_src;
2105
2106     unsigned char *pDstY = (unsigned char *)y_dst;
2107     unsigned char *pDstUV = (unsigned char *)uv_dst;
2108
2109     unsigned int yIndex = 0;
2110     unsigned int uvIndex = 0;
2111
2112     for (j = 0; j < height; j++) {
2113         for (i = 0; i < width; i++) {
2114             tmp = pSrc[j * width + i];
2115
2116             R = (tmp & 0x00FF0000) >> 16;
2117             G = (tmp & 0x0000FF00) >> 8;
2118             B = (tmp & 0x000000FF);
2119
2120             Y = ((66 * R) + (129 * G) + (25 * B) + 128);
2121             Y = Y >> 8;
2122             Y += 16;
2123
2124             pDstY[yIndex++] = (unsigned char)Y;
2125
2126             if ((j % 2) == 0 && (i % 2) == 0) {
2127                 U = ((-38 * R) - (74 * G) + (112 * B) + 128);
2128                 U = U >> 8;
2129                 U += 128;
2130                 V = ((112 * R) - (94 * G) - (18 * B) + 128);
2131                 V = V >> 8;
2132                 V += 128;
2133
2134                 pDstUV[uvIndex++] = (unsigned char)U;
2135                 pDstUV[uvIndex++] = (unsigned char)V;
2136             }
2137         }
2138     }
2139 #endif /* NEON_SUPPORT */
2140 }
2141
2142 /*
2143  * Converts RGBA8888 to YUV420SP
2144  *
2145  * @param y_dst
2146  *   Y plane address of YUV420SP[out]
2147  *
2148  * @param uv_dst
2149  *   UV plane address of YUV420SP[out]
2150  *
2151  * @param rgb_src
2152  *   Address of RGBA8888[in]
2153  *
2154  * @param width
2155  *   Width of RGBA8888[in]
2156  *
2157  * @param height
2158  *   Height of RGBA8888[in]
2159  */
2160 void csc_RGBA8888_to_YUV420SP(
2161     unsigned char *y_dst,
2162     unsigned char *uv_dst,
2163     unsigned char *rgb_src,
2164     unsigned int width,
2165     unsigned int height)
2166 {
2167 #ifdef NEON_SUPPORT
2168     csc_RGBA8888_to_YUV420SP_NEON(y_dst, uv_dst, rgb_src, width, height);
2169 #else
2170     unsigned int i, j;
2171     unsigned int tmp;
2172
2173     unsigned int R, G, B;
2174     unsigned int Y, U, V;
2175
2176     unsigned int offset = width * height;
2177
2178     unsigned int *pSrc = (unsigned int *)rgb_src;
2179
2180     unsigned char *pDstY = (unsigned char *)y_dst;
2181     unsigned char *pDstUV = (unsigned char *)uv_dst;
2182
2183     unsigned int yIndex = 0;
2184     unsigned int uvIndex = 0;
2185
2186     for (j = 0; j < height; j++) {
2187         for (i = 0; i < width; i++) {
2188             tmp = pSrc[j * width + i];
2189
2190             B = (tmp & 0x00FF0000) >> 16;
2191             G = (tmp & 0x0000FF00) >> 8;
2192             R = (tmp & 0x000000FF);
2193
2194             Y = ((66 * R) + (129 * G) + (25 * B) + 128);
2195             Y = Y >> 8;
2196             Y += 16;
2197
2198             pDstY[yIndex++] = (unsigned char)Y;
2199
2200             if ((j % 2) == 0 && (i % 2) == 0) {
2201                 U = ((-38 * R) - (74 * G) + (112 * B) + 128);
2202                 U = U >> 8;
2203                 U += 128;
2204                 V = ((112 * R) - (94 * G) - (18 * B) + 128);
2205                 V = V >> 8;
2206                 V += 128;
2207
2208                 pDstUV[uvIndex++] = (unsigned char)U;
2209                 pDstUV[uvIndex++] = (unsigned char)V;
2210             }
2211         }
2212     }
2213 #endif /* NEON_SUPPORT */
2214 }