Merge "Fixed buffer selection for UV in AltRef filtering"
[profile/ivi/libvpx.git] / y4minput.c
1 /*
2  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  *
10  *  Based on code from the OggTheora software codec source code,
11  *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
12  */
13 #include <stdlib.h>
14 #include <string.h>
15 #include "y4minput.h"
16
17 static int y4m_parse_tags(y4m_input *_y4m,char *_tags){
18   int   got_w;
19   int   got_h;
20   int   got_fps;
21   int   got_interlace;
22   int   got_par;
23   int   got_chroma;
24   char *p;
25   char *q;
26   got_w=got_h=got_fps=got_interlace=got_par=got_chroma=0;
27   for(p=_tags;;p=q){
28     /*Skip any leading spaces.*/
29     while(*p==' ')p++;
30     /*If that's all we have, stop.*/
31     if(p[0]=='\0')break;
32     /*Find the end of this tag.*/
33     for(q=p+1;*q!='\0'&&*q!=' ';q++);
34     /*Process the tag.*/
35     switch(p[0]){
36       case 'W':{
37         if(sscanf(p+1,"%d",&_y4m->pic_w)!=1)return -1;
38         got_w=1;
39       }break;
40       case 'H':{
41         if(sscanf(p+1,"%d",&_y4m->pic_h)!=1)return -1;
42         got_h=1;
43       }break;
44       case 'F':{
45         if(sscanf(p+1,"%d:%d",&_y4m->fps_n,&_y4m->fps_d)!=2){
46           return -1;
47         }
48         got_fps=1;
49       }break;
50       case 'I':{
51         _y4m->interlace=p[1];
52         got_interlace=1;
53       }break;
54       case 'A':{
55         if(sscanf(p+1,"%d:%d",&_y4m->par_n,&_y4m->par_d)!=2){
56           return -1;
57         }
58         got_par=1;
59       }break;
60       case 'C':{
61         if(q-p>16)return -1;
62         memcpy(_y4m->chroma_type,p+1,q-p-1);
63         _y4m->chroma_type[q-p-1]='\0';
64         got_chroma=1;
65       }break;
66       /*Ignore unknown tags.*/
67     }
68   }
69   if(!got_w||!got_h||!got_fps)return -1;
70   if(!got_interlace)_y4m->interlace='?';
71   if(!got_par)_y4m->par_n=_y4m->par_d=0;
72   /*Chroma-type is not specified in older files, e.g., those generated by
73      mplayer.*/
74   if(!got_chroma)strcpy(_y4m->chroma_type,"420");
75   return 0;
76 }
77
78
79
80 /*All anti-aliasing filters in the following conversion functions are based on
81    one of two window functions:
82   The 6-tap Lanczos window (for down-sampling and shifts):
83    sinc(\pi*t)*sinc(\pi*t/3), |t|<3  (sinc(t)==sin(t)/t)
84    0,                         |t|>=3
85   The 4-tap Mitchell window (for up-sampling):
86    7|t|^3-12|t|^2+16/3,             |t|<1
87    -(7/3)|x|^3+12|x|^2-20|x|+32/3,  |t|<2
88    0,                               |t|>=2
89   The number of taps is intentionally kept small to reduce computational
90    overhead and limit ringing.
91
92   The taps from these filters are scaled so that their sum is 1, and the result
93    is scaled by 128 and rounded to integers to create a filter whose
94    intermediate values fit inside 16 bits.
95   Coefficients are rounded in such a way as to ensure their sum is still 128,
96    which is usually equivalent to normal rounding.
97
98   Conversions which require both horizontal and vertical filtering could
99    have these steps pipelined, for less memory consumption and better cache
100    performance, but we do them separately for simplicity.*/
101
102 #define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
103 #define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
104 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
105
106 /*420jpeg chroma samples are sited like:
107   Y-------Y-------Y-------Y-------
108   |       |       |       |
109   |   BR  |       |   BR  |
110   |       |       |       |
111   Y-------Y-------Y-------Y-------
112   |       |       |       |
113   |       |       |       |
114   |       |       |       |
115   Y-------Y-------Y-------Y-------
116   |       |       |       |
117   |   BR  |       |   BR  |
118   |       |       |       |
119   Y-------Y-------Y-------Y-------
120   |       |       |       |
121   |       |       |       |
122   |       |       |       |
123
124   420mpeg2 chroma samples are sited like:
125   Y-------Y-------Y-------Y-------
126   |       |       |       |
127   BR      |       BR      |
128   |       |       |       |
129   Y-------Y-------Y-------Y-------
130   |       |       |       |
131   |       |       |       |
132   |       |       |       |
133   Y-------Y-------Y-------Y-------
134   |       |       |       |
135   BR      |       BR      |
136   |       |       |       |
137   Y-------Y-------Y-------Y-------
138   |       |       |       |
139   |       |       |       |
140   |       |       |       |
141
142   We use a resampling filter to shift the site locations one quarter pixel (at
143    the chroma plane's resolution) to the right.
144   The 4:2:2 modes look exactly the same, except there are twice as many chroma
145    lines, and they are vertically co-sited with the luma samples in both the
146    mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
147 static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
148  const unsigned char *_src,int _c_w,int _c_h){
149   int pli;
150   int y;
151   int x;
152   for(y=0;y<_c_h;y++){
153     /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
154        window.*/
155     for(x=0;x<OC_MINI(_c_w,2);x++){
156       _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[0]-17*_src[OC_MAXI(x-1,0)]+
157        114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
158        _src[OC_MINI(x+3,_c_w-1)]+64)>>7,255);
159     }
160     for(;x<_c_w-3;x++){
161       _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
162        114*_src[x]+35*_src[x+1]-9*_src[x+2]+_src[x+3]+64)>>7,255);
163     }
164     for(;x<_c_w;x++){
165       _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
166        114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
167        _src[_c_w-1]+64)>>7,255);
168     }
169     _dst+=_c_w;
170     _src+=_c_w;
171   }
172 }
173
174 /*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
175 static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
176  unsigned char *_aux){
177   int c_w;
178   int c_h;
179   int c_sz;
180   int pli;
181   int y;
182   int x;
183   /*Skip past the luma data.*/
184   _dst+=_y4m->pic_w*_y4m->pic_h;
185   /*Compute the size of each chroma plane.*/
186   c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
187   c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
188   c_sz=c_w*c_h;
189   for(pli=1;pli<3;pli++){
190     y4m_42xmpeg2_42xjpeg_helper(_dst,_aux,c_w,c_h);
191     _dst+=c_sz;
192     _aux+=c_sz;
193   }
194 }
195
196 /*This format is only used for interlaced content, but is included for
197    completeness.
198
199   420jpeg chroma samples are sited like:
200   Y-------Y-------Y-------Y-------
201   |       |       |       |
202   |   BR  |       |   BR  |
203   |       |       |       |
204   Y-------Y-------Y-------Y-------
205   |       |       |       |
206   |       |       |       |
207   |       |       |       |
208   Y-------Y-------Y-------Y-------
209   |       |       |       |
210   |   BR  |       |   BR  |
211   |       |       |       |
212   Y-------Y-------Y-------Y-------
213   |       |       |       |
214   |       |       |       |
215   |       |       |       |
216
217   420paldv chroma samples are sited like:
218   YR------Y-------YR------Y-------
219   |       |       |       |
220   |       |       |       |
221   |       |       |       |
222   YB------Y-------YB------Y-------
223   |       |       |       |
224   |       |       |       |
225   |       |       |       |
226   YR------Y-------YR------Y-------
227   |       |       |       |
228   |       |       |       |
229   |       |       |       |
230   YB------Y-------YB------Y-------
231   |       |       |       |
232   |       |       |       |
233   |       |       |       |
234
235   We use a resampling filter to shift the site locations one quarter pixel (at
236    the chroma plane's resolution) to the right.
237   Then we use another filter to move the C_r location down one quarter pixel,
238    and the C_b location up one quarter pixel.*/
239 static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
240  unsigned char *_aux){
241   unsigned char *tmp;
242   int            c_w;
243   int            c_h;
244   int            c_sz;
245   int            pli;
246   int            y;
247   int            x;
248   /*Skip past the luma data.*/
249   _dst+=_y4m->pic_w*_y4m->pic_h;
250   /*Compute the size of each chroma plane.*/
251   c_w=(_y4m->pic_w+1)/2;
252   c_h=(_y4m->pic_h+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
253   c_sz=c_w*c_h;
254   tmp=_aux+2*c_sz;
255   for(pli=1;pli<3;pli++){
256     /*First do the horizontal re-sampling.
257       This is the same as the mpeg2 case, except that after the horizontal
258        case, we need to apply a second vertical filter.*/
259     y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
260     _aux+=c_sz;
261     switch(pli){
262       case 1:{
263         /*Slide C_b up a quarter-pel.
264           This is the same filter used above, but in the other order.*/
265         for(x=0;x<c_w;x++){
266           for(y=0;y<OC_MINI(c_h,3);y++){
267             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[0]
268              -9*tmp[OC_MAXI(y-2,0)*c_w]+35*tmp[OC_MAXI(y-1,0)*c_w]
269              +114*tmp[y*c_w]-17*tmp[OC_MINI(y+1,c_h-1)*c_w]
270              +4*tmp[OC_MINI(y+2,c_h-1)*c_w]+64)>>7,255);
271           }
272           for(;y<c_h-2;y++){
273             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
274              -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
275              -17*tmp[(y+1)*c_w]+4*tmp[(y+2)*c_w]+64)>>7,255);
276           }
277           for(;y<c_h;y++){
278             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
279              -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
280              -17*tmp[OC_MINI(y+1,c_h-1)*c_w]+4*tmp[(c_h-1)*c_w]+64)>>7,255);
281           }
282           _dst++;
283           tmp++;
284         }
285         _dst+=c_sz-c_w;
286         tmp-=c_w;
287       }break;
288       case 2:{
289         /*Slide C_r down a quarter-pel.
290           This is the same as the horizontal filter.*/
291         for(x=0;x<c_w;x++){
292           for(y=0;y<OC_MINI(c_h,2);y++){
293             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[0]
294              -17*tmp[OC_MAXI(y-1,0)*c_w]+114*tmp[y*c_w]
295              +35*tmp[OC_MINI(y+1,c_h-1)*c_w]-9*tmp[OC_MINI(y+2,c_h-1)*c_w]
296              +tmp[OC_MINI(y+3,c_h-1)*c_w]+64)>>7,255);
297           }
298           for(;y<c_h-3;y++){
299             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
300              -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[(y+1)*c_w]
301              -9*tmp[(y+2)*c_w]+tmp[(y+3)*c_w]+64)>>7,255);
302           }
303           for(;y<c_h;y++){
304             _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
305              -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[OC_MINI(y+1,c_h-1)*c_w]
306              -9*tmp[OC_MINI(y+2,c_h-1)*c_w]+tmp[(c_h-1)*c_w]+64)>>7,255);
307           }
308           _dst++;
309           tmp++;
310         }
311       }break;
312     }
313     /*For actual interlaced material, this would have to be done separately on
314        each field, and the shift amounts would be different.
315       C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
316        C_b up 1/8 in the bottom field.
317       The corresponding filters would be:
318        Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
319        Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
320   }
321 }
322
323 /*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
324   This is used as a helper by several converation routines.*/
325 static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
326  const unsigned char *_src,int _c_w,int _c_h){
327   int y;
328   int x;
329   /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
330   for(x=0;x<_c_w;x++){
331     for(y=0;y<OC_MINI(_c_h,2);y+=2){
332       _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(64*_src[0]
333        +78*_src[OC_MINI(1,_c_h-1)*_c_w]
334        -17*_src[OC_MINI(2,_c_h-1)*_c_w]
335        +3*_src[OC_MINI(3,_c_h-1)*_c_w]+64)>>7,255);
336     }
337     for(;y<_c_h-3;y+=2){
338       _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]+_src[(y+3)*_c_w])
339        -17*(_src[(y-1)*_c_w]+_src[(y+2)*_c_w])
340        +78*(_src[y*_c_w]+_src[(y+1)*_c_w])+64)>>7,255);
341     }
342     for(;y<_c_h;y+=2){
343       _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]
344        +_src[(_c_h-1)*_c_w])-17*(_src[(y-1)*_c_w]
345        +_src[OC_MINI(y+2,_c_h-1)*_c_w])
346        +78*(_src[y*_c_w]+_src[OC_MINI(y+1,_c_h-1)*_c_w])+64)>>7,255);
347     }
348     _src++;
349     _dst++;
350   }
351 }
352
353 /*420jpeg chroma samples are sited like:
354   Y-------Y-------Y-------Y-------
355   |       |       |       |
356   |   BR  |       |   BR  |
357   |       |       |       |
358   Y-------Y-------Y-------Y-------
359   |       |       |       |
360   |       |       |       |
361   |       |       |       |
362   Y-------Y-------Y-------Y-------
363   |       |       |       |
364   |   BR  |       |   BR  |
365   |       |       |       |
366   Y-------Y-------Y-------Y-------
367   |       |       |       |
368   |       |       |       |
369   |       |       |       |
370
371   422jpeg chroma samples are sited like:
372   Y---BR--Y-------Y---BR--Y-------
373   |       |       |       |
374   |       |       |       |
375   |       |       |       |
376   Y---BR--Y-------Y---BR--Y-------
377   |       |       |       |
378   |       |       |       |
379   |       |       |       |
380   Y---BR--Y-------Y---BR--Y-------
381   |       |       |       |
382   |       |       |       |
383   |       |       |       |
384   Y---BR--Y-------Y---BR--Y-------
385   |       |       |       |
386   |       |       |       |
387   |       |       |       |
388
389   We use a resampling filter to decimate the chroma planes by two in the
390    vertical direction.*/
391 static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m,unsigned char *_dst,
392  unsigned char *_aux){
393   int c_w;
394   int c_h;
395   int c_sz;
396   int dst_c_w;
397   int dst_c_h;
398   int dst_c_sz;
399   int tmp_sz;
400   int pic_sz;
401   int pli;
402   /*Skip past the luma data.*/
403   _dst+=_y4m->pic_w*_y4m->pic_h;
404   /*Compute the size of each chroma plane.*/
405   c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
406   c_h=_y4m->pic_h;
407   dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
408   dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
409   c_sz=c_w*c_h;
410   dst_c_sz=dst_c_w*dst_c_h;
411   for(pli=1;pli<3;pli++){
412     y4m_422jpeg_420jpeg_helper(_dst,_aux,c_w,c_h);
413     _aux+=c_sz;
414     _dst+=dst_c_sz;
415   }
416 }
417
418 /*420jpeg chroma samples are sited like:
419   Y-------Y-------Y-------Y-------
420   |       |       |       |
421   |   BR  |       |   BR  |
422   |       |       |       |
423   Y-------Y-------Y-------Y-------
424   |       |       |       |
425   |       |       |       |
426   |       |       |       |
427   Y-------Y-------Y-------Y-------
428   |       |       |       |
429   |   BR  |       |   BR  |
430   |       |       |       |
431   Y-------Y-------Y-------Y-------
432   |       |       |       |
433   |       |       |       |
434   |       |       |       |
435
436   422 chroma samples are sited like:
437   YBR-----Y-------YBR-----Y-------
438   |       |       |       |
439   |       |       |       |
440   |       |       |       |
441   YBR-----Y-------YBR-----Y-------
442   |       |       |       |
443   |       |       |       |
444   |       |       |       |
445   YBR-----Y-------YBR-----Y-------
446   |       |       |       |
447   |       |       |       |
448   |       |       |       |
449   YBR-----Y-------YBR-----Y-------
450   |       |       |       |
451   |       |       |       |
452   |       |       |       |
453
454   We use a resampling filter to shift the original site locations one quarter
455    pixel (at the original chroma resolution) to the right.
456   Then we use a second resampling filter to decimate the chroma planes by two
457    in the vertical direction.*/
458 static void y4m_convert_422_420jpeg(y4m_input *_y4m,unsigned char *_dst,
459  unsigned char *_aux){
460   unsigned char *tmp;
461   int            c_w;
462   int            c_h;
463   int            c_sz;
464   int            dst_c_w;
465   int            dst_c_h;
466   int            dst_c_sz;
467   int            pli;
468   int            y;
469   int            x;
470   /*Skip past the luma data.*/
471   _dst+=_y4m->pic_w*_y4m->pic_h;
472   /*Compute the size of each chroma plane.*/
473   c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
474   c_h=_y4m->pic_h;
475   dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
476   c_sz=c_w*c_h;
477   dst_c_sz=c_w*dst_c_h;
478   tmp=_aux+2*c_sz;
479   for(pli=1;pli<3;pli++){
480     /*In reality, the horizontal and vertical steps could be pipelined, for
481        less memory consumption and better cache performance, but we do them
482        separately for simplicity.*/
483     /*First do horizontal filtering (convert to 422jpeg)*/
484     y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
485     /*Now do the vertical filtering.*/
486     y4m_422jpeg_420jpeg_helper(_dst,tmp,c_w,c_h);
487     _aux+=c_sz;
488     _dst+=dst_c_sz;
489   }
490 }
491
492 /*420jpeg chroma samples are sited like:
493   Y-------Y-------Y-------Y-------
494   |       |       |       |
495   |   BR  |       |   BR  |
496   |       |       |       |
497   Y-------Y-------Y-------Y-------
498   |       |       |       |
499   |       |       |       |
500   |       |       |       |
501   Y-------Y-------Y-------Y-------
502   |       |       |       |
503   |   BR  |       |   BR  |
504   |       |       |       |
505   Y-------Y-------Y-------Y-------
506   |       |       |       |
507   |       |       |       |
508   |       |       |       |
509
510   411 chroma samples are sited like:
511   YBR-----Y-------Y-------Y-------
512   |       |       |       |
513   |       |       |       |
514   |       |       |       |
515   YBR-----Y-------Y-------Y-------
516   |       |       |       |
517   |       |       |       |
518   |       |       |       |
519   YBR-----Y-------Y-------Y-------
520   |       |       |       |
521   |       |       |       |
522   |       |       |       |
523   YBR-----Y-------Y-------Y-------
524   |       |       |       |
525   |       |       |       |
526   |       |       |       |
527
528   We use a filter to resample at site locations one eighth pixel (at the source
529    chroma plane's horizontal resolution) and five eighths of a pixel to the
530    right.
531   Then we use another filter to decimate the planes by 2 in the vertical
532    direction.*/
533 static void y4m_convert_411_420jpeg(y4m_input *_y4m,unsigned char *_dst,
534  unsigned char *_aux){
535   unsigned char *tmp;
536   int            c_w;
537   int            c_h;
538   int            c_sz;
539   int            dst_c_w;
540   int            dst_c_h;
541   int            dst_c_sz;
542   int            tmp_sz;
543   int            pli;
544   int            y;
545   int            x;
546   /*Skip past the luma data.*/
547   _dst+=_y4m->pic_w*_y4m->pic_h;
548   /*Compute the size of each chroma plane.*/
549   c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
550   c_h=_y4m->pic_h;
551   dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
552   dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
553   c_sz=c_w*c_h;
554   dst_c_sz=dst_c_w*dst_c_h;
555   tmp_sz=dst_c_w*c_h;
556   tmp=_aux+2*c_sz;
557   for(pli=1;pli<3;pli++){
558     /*In reality, the horizontal and vertical steps could be pipelined, for
559        less memory consumption and better cache performance, but we do them
560        separately for simplicity.*/
561     /*First do horizontal filtering (convert to 422jpeg)*/
562     for(y=0;y<c_h;y++){
563       /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
564          4-tap Mitchell window.*/
565       for(x=0;x<OC_MINI(c_w,1);x++){
566         tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(111*_aux[0]
567          +18*_aux[OC_MINI(1,c_w-1)]-_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
568         tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(47*_aux[0]
569          +86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
570       }
571       for(;x<c_w-2;x++){
572         tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
573          +18*_aux[x+1]-_aux[x+2]+64)>>7,255);
574         tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
575          +86*_aux[x+1]-5*_aux[x+2]+64)>>7,255);
576       }
577       for(;x<c_w;x++){
578         tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
579          +18*_aux[OC_MINI(x+1,c_w-1)]-_aux[c_w-1]+64)>>7,255);
580         if((x<<1|1)<dst_c_w){
581           tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
582            +86*_aux[OC_MINI(x+1,c_w-1)]-5*_aux[c_w-1]+64)>>7,255);
583         }
584       }
585       tmp+=dst_c_w;
586       _aux+=c_w;
587     }
588     tmp-=tmp_sz;
589     /*Now do the vertical filtering.*/
590     y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
591     _dst+=dst_c_sz;
592   }
593 }
594
595 /*Convert 444 to 420jpeg.*/
596 static void y4m_convert_444_420jpeg(y4m_input *_y4m,unsigned char *_dst,
597  unsigned char *_aux){
598   unsigned char *tmp;
599   int            c_w;
600   int            c_h;
601   int            c_sz;
602   int            dst_c_w;
603   int            dst_c_h;
604   int            dst_c_sz;
605   int            tmp_sz;
606   int            pli;
607   int            y;
608   int            x;
609   /*Skip past the luma data.*/
610   _dst+=_y4m->pic_w*_y4m->pic_h;
611   /*Compute the size of each chroma plane.*/
612   c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
613   c_h=_y4m->pic_h;
614   dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
615   dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
616   c_sz=c_w*c_h;
617   dst_c_sz=dst_c_w*dst_c_h;
618   tmp_sz=dst_c_w*c_h;
619   tmp=_aux+2*c_sz;
620   for(pli=1;pli<3;pli++){
621     /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
622     for(y=0;y<c_h;y++){
623       for(x=0;x<OC_MINI(c_w,2);x+=2){
624         tmp[x>>1]=OC_CLAMPI(0,(64*_aux[0]+78*_aux[OC_MINI(1,c_w-1)]
625          -17*_aux[OC_MINI(2,c_w-1)]
626          +3*_aux[OC_MINI(3,c_w-1)]+64)>>7,255);
627       }
628       for(;x<c_w-3;x+=2){
629         tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[x+3])
630          -17*(_aux[x-1]+_aux[x+2])+78*(_aux[x]+_aux[x+1])+64)>>7,255);
631       }
632       for(;x<c_w;x+=2){
633         tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[c_w-1])-
634          17*(_aux[x-1]+_aux[OC_MINI(x+2,c_w-1)])+
635          78*(_aux[x]+_aux[OC_MINI(x+1,c_w-1)])+64)>>7,255);
636       }
637       tmp+=dst_c_w;
638       _aux+=c_w;
639     }
640     tmp-=tmp_sz;
641     /*Now do the vertical filtering.*/
642     y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
643     _dst+=dst_c_sz;
644   }
645 }
646
647 /*The image is padded with empty chroma components at 4:2:0.*/
648 static void y4m_convert_mono_420jpeg(y4m_input *_y4m,unsigned char *_dst,
649  unsigned char *_aux){
650   int c_sz;
651   _dst+=_y4m->pic_w*_y4m->pic_h;
652   c_sz=((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
653    ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
654   memset(_dst,128,c_sz*2);
655 }
656
657 /*No conversion function needed.*/
658 static void y4m_convert_null(y4m_input *_y4m,unsigned char *_dst,
659  unsigned char *_aux){
660 }
661
662 int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){
663   char buffer[80];
664   int  ret;
665   int  i;
666   /*Read until newline, or 80 cols, whichever happens first.*/
667   for(i=0;i<79;i++){
668     if(_nskip>0){
669       buffer[i]=*_skip++;
670       _nskip--;
671     }
672     else{
673       ret=fread(buffer+i,1,1,_fin);
674       if(ret<1)return -1;
675     }
676     if(buffer[i]=='\n')break;
677   }
678   /*We skipped too much header data.*/
679   if(_nskip>0)return -1;
680   if(i==79){
681     fprintf(stderr,"Error parsing header; not a YUV2MPEG2 file?\n");
682     return -1;
683   }
684   buffer[i]='\0';
685   if(memcmp(buffer,"YUV4MPEG",8)){
686     fprintf(stderr,"Incomplete magic for YUV4MPEG file.\n");
687     return -1;
688   }
689   if(buffer[8]!='2'){
690     fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n");
691   }
692   ret=y4m_parse_tags(_y4m,buffer+5);
693   if(ret<0){
694     fprintf(stderr,"Error parsing YUV4MPEG2 header.\n");
695     return ret;
696   }
697   if(_y4m->interlace=='?'){
698     fprintf(stderr,"Warning: Input video interlacing format unknown; "
699      "assuming progressive scan.\n");
700   }
701   else if(_y4m->interlace!='p'){
702     fprintf(stderr,"Input video is interlaced; "
703      "Only progressive scan handled.\n");
704     return -1;
705   }
706   if(strcmp(_y4m->chroma_type,"420")==0||
707    strcmp(_y4m->chroma_type,"420jpeg")==0){
708     _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
709     _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h
710      +2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
711     /*Natively supported: no conversion required.*/
712     _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
713     _y4m->convert=y4m_convert_null;
714   }
715   else if(strcmp(_y4m->chroma_type,"420mpeg2")==0){
716     _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
717     _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
718     /*Chroma filter required: read into the aux buf first.*/
719     _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=
720      2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
721     _y4m->convert=y4m_convert_42xmpeg2_42xjpeg;
722   }
723   else if(strcmp(_y4m->chroma_type,"420paldv")==0){
724     _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
725     _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
726     /*Chroma filter required: read into the aux buf first.
727       We need to make two filter passes, so we need some extra space in the
728        aux buffer.*/
729     _y4m->aux_buf_sz=3*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
730     _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
731     _y4m->convert=y4m_convert_42xpaldv_42xjpeg;
732   }
733   else if(strcmp(_y4m->chroma_type,"422jpeg")==0){
734     _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
735     _y4m->src_c_dec_v=1;
736     _y4m->dst_c_dec_v=2;
737     _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
738     /*Chroma filter required: read into the aux buf first.*/
739     _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
740     _y4m->convert=y4m_convert_422jpeg_420jpeg;
741   }
742   else if(strcmp(_y4m->chroma_type,"422")==0){
743     _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
744     _y4m->src_c_dec_v=1;
745     _y4m->dst_c_dec_v=2;
746     _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
747     /*Chroma filter required: read into the aux buf first.
748       We need to make two filter passes, so we need some extra space in the
749        aux buffer.*/
750     _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
751     _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
752     _y4m->convert=y4m_convert_422_420jpeg;
753   }
754   else if(strcmp(_y4m->chroma_type,"411")==0){
755     _y4m->src_c_dec_h=4;
756     _y4m->dst_c_dec_h=2;
757     _y4m->src_c_dec_v=1;
758     _y4m->dst_c_dec_v=2;
759     _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
760     /*Chroma filter required: read into the aux buf first.
761       We need to make two filter passes, so we need some extra space in the
762        aux buffer.*/
763     _y4m->aux_buf_read_sz=2*((_y4m->pic_w+3)/4)*_y4m->pic_h;
764     _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
765     _y4m->convert=y4m_convert_411_420jpeg;
766   }
767   else if(strcmp(_y4m->chroma_type,"444")==0){
768     _y4m->src_c_dec_h=1;
769     _y4m->dst_c_dec_h=2;
770     _y4m->src_c_dec_v=1;
771     _y4m->dst_c_dec_v=2;
772     _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
773     /*Chroma filter required: read into the aux buf first.
774       We need to make two filter passes, so we need some extra space in the
775        aux buffer.*/
776     _y4m->aux_buf_read_sz=2*_y4m->pic_w*_y4m->pic_h;
777     _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
778     _y4m->convert=y4m_convert_444_420jpeg;
779   }
780   else if(strcmp(_y4m->chroma_type,"444alpha")==0){
781     _y4m->src_c_dec_h=1;
782     _y4m->dst_c_dec_h=2;
783     _y4m->src_c_dec_v=1;
784     _y4m->dst_c_dec_v=2;
785     _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
786     /*Chroma filter required: read into the aux buf first.
787       We need to make two filter passes, so we need some extra space in the
788        aux buffer.
789       The extra plane also gets read into the aux buf.
790       It will be discarded.*/
791     _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=3*_y4m->pic_w*_y4m->pic_h;
792     _y4m->convert=y4m_convert_444_420jpeg;
793   }
794   else if(strcmp(_y4m->chroma_type,"mono")==0){
795     _y4m->src_c_dec_h=_y4m->src_c_dec_v=0;
796     _y4m->dst_c_dec_h=_y4m->dst_c_dec_v=2;
797     _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
798     /*No extra space required, but we need to clear the chroma planes.*/
799     _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
800     _y4m->convert=y4m_convert_mono_420jpeg;
801   }
802   else{
803     fprintf(stderr,"Unknown chroma sampling type: %s\n",_y4m->chroma_type);
804     return -1;
805   }
806   /*The size of the final frame buffers is always computed from the
807      destination chroma decimation type.*/
808   _y4m->dst_buf_sz=_y4m->pic_w*_y4m->pic_h
809    +2*((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
810    ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
811   _y4m->dst_buf=(unsigned char *)malloc(_y4m->dst_buf_sz);
812   _y4m->aux_buf=(unsigned char *)malloc(_y4m->aux_buf_sz);
813   return 0;
814 }
815
816 void y4m_input_close(y4m_input *_y4m){
817   free(_y4m->dst_buf);
818   free(_y4m->aux_buf);
819 }
820
821 int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){
822   char frame[6];
823   int  pic_sz;
824   int  frame_c_w;
825   int  frame_c_h;
826   int  c_w;
827   int  c_h;
828   int  c_sz;
829   int  ret;
830   /*Read and skip the frame header.*/
831   ret=fread(frame,1,6,_fin);
832   if(ret<6)return 0;
833   if(memcmp(frame,"FRAME",5)){
834     fprintf(stderr,"Loss of framing in Y4M input data\n");
835     return -1;
836   }
837   if(frame[5]!='\n'){
838     char c;
839     int  j;
840     for(j=0;j<79&&fread(&c,1,1,_fin)&&c!='\n';j++);
841     if(j==79){
842       fprintf(stderr,"Error parsing Y4M frame header\n");
843       return -1;
844     }
845   }
846   /*Read the frame data that needs no conversion.*/
847   if(fread(_y4m->dst_buf,1,_y4m->dst_buf_read_sz,_fin)!=_y4m->dst_buf_read_sz){
848     fprintf(stderr,"Error reading Y4M frame data.\n");
849     return -1;
850   }
851   /*Read the frame data that does need conversion.*/
852   if(fread(_y4m->aux_buf,1,_y4m->aux_buf_read_sz,_fin)!=_y4m->aux_buf_read_sz){
853     fprintf(stderr,"Error reading Y4M frame data.\n");
854     return -1;
855   }
856   /*Now convert the just read frame.*/
857   (*_y4m->convert)(_y4m,_y4m->dst_buf,_y4m->aux_buf);
858   /*Fill in the frame buffer pointers.
859     We don't use vpx_img_wrap() because it forces padding for odd picture
860      sizes, which would require a separate fread call for every row.*/
861   memset(_img,0,sizeof(*_img));
862   /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
863   _img->fmt=IMG_FMT_I420;
864   _img->w=_img->d_w=_y4m->pic_w;
865   _img->h=_img->d_h=_y4m->pic_h;
866   /*This is hard-coded to 4:2:0 for now, as that's all VP8 supports.*/
867   _img->x_chroma_shift=1;
868   _img->y_chroma_shift=1;
869   _img->bps=12;
870   /*Set up the buffer pointers.*/
871   pic_sz=_y4m->pic_w*_y4m->pic_h;
872   c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
873   c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
874   c_sz=c_w*c_h;
875   _img->stride[PLANE_Y]=_y4m->pic_w;
876   _img->stride[PLANE_U]=_img->stride[PLANE_V]=c_w;
877   _img->planes[PLANE_Y]=_y4m->dst_buf;
878   _img->planes[PLANE_U]=_y4m->dst_buf+pic_sz;
879   _img->planes[PLANE_V]=_y4m->dst_buf+pic_sz+c_sz;
880   return 0;
881 }