From aeed28f143f248d4e7a3312c3b82672d1175385e Mon Sep 17 00:00:00 2001 From: Dmitry Kovalev Date: Tue, 9 Jul 2013 11:22:56 -0700 Subject: [PATCH] Removing vp9_maskingmv.c and corresponding assembly file. Change-Id: I9842d02d61d78d17dc3449bae8ffbe60f4b3ecb3 --- vp9/common/vp9_maskingmv.c | 803 --------------------------------------- vp9/common/x86/vp9_mask_sse3.asm | 484 ----------------------- vp9/vp9_common.mk | 6 - 3 files changed, 1293 deletions(-) delete mode 100644 vp9/common/vp9_maskingmv.c delete mode 100644 vp9/common/x86/vp9_mask_sse3.asm diff --git a/vp9/common/vp9_maskingmv.c b/vp9/common/vp9_maskingmv.c deleted file mode 100644 index 326201b..0000000 --- a/vp9/common/vp9_maskingmv.c +++ /dev/null @@ -1,803 +0,0 @@ -/* - ============================================================================ - Name : vp9_maskingmv.c - Author : jimbankoski - Version : - Copyright : Your copyright notice - Description : Hello World in C, Ansi-style - ============================================================================ - */ - -#include -#include -#include - -unsigned int vp9_sad16x16_sse3( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride, - int max_err); - -int vp8_growmaskmb_sse3( - unsigned char *om, - unsigned char *nm); - -void vp8_makemask_sse3( - unsigned char *y, - unsigned char *u, - unsigned char *v, - unsigned char *ym, - int yp, - int uvp, - int ys, - int us, - int vs, - int yt, - int ut, - int vt); - -unsigned int vp9_sad16x16_unmasked_wmt( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride, - unsigned char *mask); - -unsigned int vp9_sad16x16_masked_wmt( - unsigned char *src_ptr, - int src_stride, - unsigned char *ref_ptr, - int ref_stride, - unsigned char *mask); - -unsigned int vp8_masked_predictor_wmt( - unsigned char *masked, - unsigned char *unmasked, - int src_stride, - unsigned char *dst_ptr, - int dst_stride, - unsigned char *mask); -unsigned int vp8_masked_predictor_uv_wmt( - unsigned char *masked, - unsigned char *unmasked, - int src_stride, - unsigned char *dst_ptr, - int dst_stride, - unsigned char *mask); -unsigned int vp8_uv_from_y_mask( - unsigned char *ymask, - unsigned char *uvmask); -int yp = 16; -unsigned char sxy[] = { - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90 -}; - -unsigned char sts[] = { - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; -unsigned char str[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -}; - -unsigned char y[] = { - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, - 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, - 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, - 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, - 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, - 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, - 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, - 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, - 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, - 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, - 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, - 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40 -}; -int uvp = 8; -unsigned char u[] = { - 90, 80, 70, 70, 90, 90, 90, 17, - 90, 80, 70, 70, 90, 90, 90, 17, - 84, 70, 70, 90, 90, 90, 17, 17, - 84, 70, 70, 90, 90, 90, 17, 17, - 80, 70, 70, 90, 90, 90, 17, 17, - 90, 80, 70, 70, 90, 90, 90, 17, - 90, 80, 70, 70, 90, 90, 90, 17, - 90, 80, 70, 70, 90, 90, 90, 17 -}; - -unsigned char v[] = { - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80 -}; - -unsigned char ym[256]; -unsigned char uvm[64]; -typedef struct { - unsigned char y; - unsigned char yt; - unsigned char u; - unsigned char ut; - unsigned char v; - unsigned char vt; - unsigned char use; -} COLOR_SEG_ELEMENT; - -/* -COLOR_SEG_ELEMENT segmentation[]= -{ - { 60,4,80,17,80,10, 1}, - { 40,4,15,10,80,10, 1}, -}; -*/ - -COLOR_SEG_ELEMENT segmentation[] = { - { 79, 44, 92, 44, 237, 60, 1}, -}; - -unsigned char pixel_mask(unsigned char y, unsigned char u, unsigned char v, - COLOR_SEG_ELEMENT sgm[], - int c) { - COLOR_SEG_ELEMENT *s = sgm; - unsigned char m = 0; - int i; - for (i = 0; i < c; i++, s++) - m |= (abs(y - s->y) < s->yt && - abs(u - s->u) < s->ut && - abs(v - s->v) < s->vt ? 255 : 0); - - return m; -} -int neighbors[256][8]; -int makeneighbors(void) { - int i, j; - for (i = 0; i < 256; i++) { - int r = (i >> 4), c = (i & 15); - int ni = 0; - for (j = 0; j < 8; j++) - neighbors[i][j] = i; - for (j = 0; j < 256; j++) { - int nr = (j >> 4), nc = (j & 15); - if (abs(nr - r) < 2 && abs(nc - c) < 2) - neighbors[i][ni++] = j; - } - } - return 0; -} -void grow_ymask(unsigned char *ym) { - unsigned char nym[256]; - int i, j; - - for (i = 0; i < 256; i++) { - nym[i] = ym[i]; - for (j = 0; j < 8; j++) { - nym[i] |= ym[neighbors[i][j]]; - } - } - for (i = 0; i < 256; i++) - ym[i] = nym[i]; -} - -void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v, - unsigned char *ym, unsigned char *uvm, - int yp, int uvp, - COLOR_SEG_ELEMENT sgm[], - int count) { - int r, c; - unsigned char *oym = ym; - - memset(ym, 20, 256); - for (r = 0; r < 8; r++, uvm += 8, u += uvp, v += uvp, y += (yp + yp), ym += 32) - for (c = 0; c < 8; c++) { - int y1 = y[c << 1]; - int u1 = u[c]; - int v1 = v[c]; - int m = pixel_mask(y1, u1, v1, sgm, count); - uvm[c] = m; - ym[c << 1] = uvm[c]; // = pixel_mask(y[c<<1],u[c],v[c],sgm,count); - ym[(c << 1) + 1] = pixel_mask(y[1 + (c << 1)], u[c], v[c], sgm, count); - ym[(c << 1) + 16] = pixel_mask(y[yp + (c << 1)], u[c], v[c], sgm, count); - ym[(c << 1) + 17] = pixel_mask(y[1 + yp + (c << 1)], u[c], v[c], sgm, count); - } - grow_ymask(oym); -} - -int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp, - unsigned char *ym) { - int i, j; - unsigned sad = 0; - for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16) - for (j = 0; j < 16; j++) - if (ym[j]) - sad += abs(src[j] - dst[j]); - - return sad; -} - -int compare_masks(unsigned char *sym, unsigned char *ym) { - int i, j; - unsigned sad = 0; - for (i = 0; i < 16; i++, sym += 16, ym += 16) - for (j = 0; j < 16; j++) - sad += (sym[j] != ym[j] ? 1 : 0); - - return sad; -} - -int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp, - unsigned char *ym) { - int i, j; - unsigned sad = 0; - for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16) - for (j = 0; j < 16; j++) - if (!ym[j]) - sad += abs(src[j] - dst[j]); - - return sad; -} - -int masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v, - int yp, int uvp, - unsigned char *dy, unsigned char *du, unsigned char *dv, - int dyp, int duvp, - COLOR_SEG_ELEMENT sgm[], - int count, - int *mi, - int *mj, - int *ui, - int *uj, - int *wm) { - int i, j; - - unsigned char ym[256]; - unsigned char uvm[64]; - unsigned char dym[256]; - unsigned char duvm[64]; - unsigned int e = 0; - int beste = 256; - int bmi = -32, bmj = -32; - int bui = -32, buj = -32; - int beste1 = 256; - int bmi1 = -32, bmj1 = -32; - int bui1 = -32, buj1 = -32; - int obeste; - - // first try finding best mask and then unmasked - beste = 0xffffffff; - - // find best unmasked mv - for (i = -32; i < 32; i++) { - unsigned char *dyz = i * dyp + dy; - unsigned char *duz = i / 2 * duvp + du; - unsigned char *dvz = i / 2 * duvp + dv; - for (j = -32; j < 32; j++) { - // 0,0 masked destination - make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count); - - e = unmasked_sad(y, yp, dyz + j, dyp, dym); - - if (e < beste) { - bui = i; - buj = j; - beste = e; - } - } - } - // bui=0;buj=0; - // best mv masked destination - make_mb_mask(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2, - dym, duvm, dyp, duvp, sgm, count); - - obeste = beste; - beste = 0xffffffff; - - // find best masked - for (i = -32; i < 32; i++) { - unsigned char *dyz = i * dyp + dy; - for (j = -32; j < 32; j++) { - e = masked_sad(y, yp, dyz + j, dyp, dym); - - if (e < beste) { - bmi = i; - bmj = j; - beste = e; - } - } - } - beste1 = beste + obeste; - bmi1 = bmi; - bmj1 = bmj; - bui1 = bui; - buj1 = buj; - - beste = 0xffffffff; - // source mask - make_mb_mask(y, u, v, ym, uvm, yp, uvp, sgm, count); - - // find best mask - for (i = -32; i < 32; i++) { - unsigned char *dyz = i * dyp + dy; - unsigned char *duz = i / 2 * duvp + du; - unsigned char *dvz = i / 2 * duvp + dv; - for (j = -32; j < 32; j++) { - // 0,0 masked destination - make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count); - - e = compare_masks(ym, dym); - - if (e < beste) { - bmi = i; - bmj = j; - beste = e; - } - } - } - - - // best mv masked destination - make_mb_mask(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2, - dym, duvm, dyp, duvp, sgm, count); - - obeste = masked_sad(y, yp, dy + bmi * dyp + bmj, dyp, dym); - - beste = 0xffffffff; - - // find best unmasked mv - for (i = -32; i < 32; i++) { - unsigned char *dyz = i * dyp + dy; - for (j = -32; j < 32; j++) { - e = unmasked_sad(y, yp, dyz + j, dyp, dym); - - if (e < beste) { - bui = i; - buj = j; - beste = e; - } - } - } - beste += obeste; - - - if (beste < beste1) { - *mi = bmi; - *mj = bmj; - *ui = bui; - *uj = buj; - *wm = 1; - } else { - *mi = bmi1; - *mj = bmj1; - *ui = bui1; - *uj = buj1; - *wm = 0; - - } - return 0; -} - -int predict(unsigned char *src, int p, unsigned char *dst, int dp, - unsigned char *ym, unsigned char *prd) { - int i, j; - for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16, prd += 16) - for (j = 0; j < 16; j++) - prd[j] = (ym[j] ? src[j] : dst[j]); - return 0; -} - -int fast_masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v, - int yp, int uvp, - unsigned char *dy, unsigned char *du, unsigned char *dv, - int dyp, int duvp, - COLOR_SEG_ELEMENT sgm[], - int count, - int *mi, - int *mj, - int *ui, - int *uj, - int *wm) { - int i, j; - - unsigned char ym[256]; - unsigned char ym2[256]; - unsigned char uvm[64]; - unsigned char dym2[256]; - unsigned char dym[256]; - unsigned char duvm[64]; - unsigned int e = 0; - int beste = 256; - int bmi = -32, bmj = -32; - int bui = -32, buj = -32; - int beste1 = 256; - int bmi1 = -32, bmj1 = -32; - int bui1 = -32, buj1 = -32; - int obeste; - - // first try finding best mask and then unmasked - beste = 0xffffffff; - -#if 0 - for (i = 0; i < 16; i++) { - unsigned char *dy = i * yp + y; - for (j = 0; j < 16; j++) - printf("%2x", dy[j]); - printf("\n"); - } - printf("\n"); - - for (i = -32; i < 48; i++) { - unsigned char *dyz = i * dyp + dy; - for (j = -32; j < 48; j++) - printf("%2x", dyz[j]); - printf("\n"); - } -#endif - - // find best unmasked mv - for (i = -32; i < 32; i++) { - unsigned char *dyz = i * dyp + dy; - unsigned char *duz = i / 2 * duvp + du; - unsigned char *dvz = i / 2 * duvp + dv; - for (j = -32; j < 32; j++) { - // 0,0 masked destination - vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp, - sgm[0].y, sgm[0].u, sgm[0].v, - sgm[0].yt, sgm[0].ut, sgm[0].vt); - - vp8_growmaskmb_sse3(dym, dym2); - - e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2); - - if (e < beste) { - bui = i; - buj = j; - beste = e; - } - } - } - // bui=0;buj=0; - // best mv masked destination - - vp8_makemask_sse3(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2, - dym, dyp, duvp, - sgm[0].y, sgm[0].u, sgm[0].v, - sgm[0].yt, sgm[0].ut, sgm[0].vt); - - vp8_growmaskmb_sse3(dym, dym2); - - obeste = beste; - beste = 0xffffffff; - - // find best masked - for (i = -32; i < 32; i++) { - unsigned char *dyz = i * dyp + dy; - for (j = -32; j < 32; j++) { - e = vp9_sad16x16_masked_wmt(y, yp, dyz + j, dyp, dym2); - if (e < beste) { - bmi = i; - bmj = j; - beste = e; - } - } - } - beste1 = beste + obeste; - bmi1 = bmi; - bmj1 = bmj; - bui1 = bui; - buj1 = buj; - - // source mask - vp8_makemask_sse3(y, u, v, - ym, yp, uvp, - sgm[0].y, sgm[0].u, sgm[0].v, - sgm[0].yt, sgm[0].ut, sgm[0].vt); - - vp8_growmaskmb_sse3(ym, ym2); - - // find best mask - for (i = -32; i < 32; i++) { - unsigned char *dyz = i * dyp + dy; - unsigned char *duz = i / 2 * duvp + du; - unsigned char *dvz = i / 2 * duvp + dv; - for (j = -32; j < 32; j++) { - // 0,0 masked destination - vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp, - sgm[0].y, sgm[0].u, sgm[0].v, - sgm[0].yt, sgm[0].ut, sgm[0].vt); - - vp8_growmaskmb_sse3(dym, dym2); - - e = compare_masks(ym2, dym2); - - if (e < beste) { - bmi = i; - bmj = j; - beste = e; - } - } - } - - vp8_makemask_sse3(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2, - dym, dyp, duvp, - sgm[0].y, sgm[0].u, sgm[0].v, - sgm[0].yt, sgm[0].ut, sgm[0].vt); - - vp8_growmaskmb_sse3(dym, dym2); - - obeste = vp9_sad16x16_masked_wmt(y, yp, dy + bmi * dyp + bmj, dyp, dym2); - - beste = 0xffffffff; - - // find best unmasked mv - for (i = -32; i < 32; i++) { - unsigned char *dyz = i * dyp + dy; - for (j = -32; j < 32; j++) { - e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2); - - if (e < beste) { - bui = i; - buj = j; - beste = e; - } - } - } - beste += obeste; - - if (beste < beste1) { - *mi = bmi; - *mj = bmj; - *ui = bui; - *uj = buj; - *wm = 1; - } else { - *mi = bmi1; - *mj = bmj1; - *ui = bui1; - *uj = buj1; - *wm = 0; - beste = beste1; - - } - return beste; -} - -int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm, - int ymp, int uvmp, - unsigned char *yp, unsigned char *up, unsigned char *vp, - int ypp, int uvpp, - COLOR_SEG_ELEMENT sgm[], - int count, - int mi, - int mj, - int ui, - int uj, - int wm) { - int i, j; - unsigned char dym[256]; - unsigned char dym2[256]; - unsigned char duvm[64]; - unsigned char *yu = ym, *uu = um, *vu = vm; - - unsigned char *dym3 = dym2; - - ym += mi * ymp + mj; - um += mi / 2 * uvmp + mj / 2; - vm += mi / 2 * uvmp + mj / 2; - - yu += ui * ymp + uj; - uu += ui / 2 * uvmp + uj / 2; - vu += ui / 2 * uvmp + uj / 2; - - // best mv masked destination - if (wm) - vp8_makemask_sse3(ym, um, vm, dym, ymp, uvmp, - sgm[0].y, sgm[0].u, sgm[0].v, - sgm[0].yt, sgm[0].ut, sgm[0].vt); - else - vp8_makemask_sse3(yu, uu, vu, dym, ymp, uvmp, - sgm[0].y, sgm[0].u, sgm[0].v, - sgm[0].yt, sgm[0].ut, sgm[0].vt); - - vp8_growmaskmb_sse3(dym, dym2); - vp8_masked_predictor_wmt(ym, yu, ymp, yp, ypp, dym3); - vp8_uv_from_y_mask(dym3, duvm); - vp8_masked_predictor_uv_wmt(um, uu, uvmp, up, uvpp, duvm); - vp8_masked_predictor_uv_wmt(vm, vu, uvmp, vp, uvpp, duvm); - - return 0; -} - -unsigned char f0p[1280 * 720 * 3 / 2]; -unsigned char f1p[1280 * 720 * 3 / 2]; -unsigned char prd[1280 * 720 * 3 / 2]; -unsigned char msk[1280 * 720 * 3 / 2]; - - -int mainz(int argc, char *argv[]) { - - FILE *f = fopen(argv[1], "rb"); - FILE *g = fopen(argv[2], "wb"); - int w = atoi(argv[3]), h = atoi(argv[4]); - int y_stride = w, uv_stride = w / 2; - int r, c; - unsigned char *f0 = f0p, *f1 = f1p, *t; - unsigned char ym[256], uvm[64]; - unsigned char ym2[256], uvm2[64]; - unsigned char ym3[256], uvm3[64]; - int a, b; - - COLOR_SEG_ELEMENT last = { 20, 20, 20, 20, 230, 20, 1}, best; -#if 0 - makeneighbors(); - COLOR_SEG_ELEMENT segmentation[] = { - { 60, 4, 80, 17, 80, 10, 1}, - { 40, 4, 15, 10, 80, 10, 1}, - }; - make_mb_mask(y, u, v, ym2, uvm2, 16, 8, segmentation, 1); - - vp8_makemask_sse3(y, u, v, ym, (int) 16, (int) 8, - (int) segmentation[0].y, (int) segmentation[0].u, (int) segmentation[0].v, - segmentation[0].yt, segmentation[0].ut, segmentation[0].vt); - - vp8_growmaskmb_sse3(ym, ym3); - - a = vp9_sad16x16_masked_wmt(str, 16, sts, 16, ym3); - b = vp9_sad16x16_unmasked_wmt(str, 16, sts, 16, ym3); - - vp8_masked_predictor_wmt(str, sts, 16, ym, 16, ym3); - - vp8_uv_from_y_mask(ym3, uvm3); - - return 4; -#endif - makeneighbors(); - - - memset(prd, 128, w * h * 3 / 2); - - fread(f0, w * h * 3 / 2, 1, f); - - while (!feof(f)) { - unsigned char *ys = f1, *yd = f0, *yp = prd; - unsigned char *us = f1 + w * h, *ud = f0 + w * h, *up = prd + w * h; - unsigned char *vs = f1 + w * h * 5 / 4, *vd = f0 + w * h * 5 / 4, *vp = prd + w * h * 5 / 4; - fread(f1, w * h * 3 / 2, 1, f); - - ys += 32 * y_stride; - yd += 32 * y_stride; - yp += 32 * y_stride; - us += 16 * uv_stride; - ud += 16 * uv_stride; - up += 16 * uv_stride; - vs += 16 * uv_stride; - vd += 16 * uv_stride; - vp += 16 * uv_stride; - for (r = 32; r < h - 32; r += 16, - ys += 16 * w, yd += 16 * w, yp += 16 * w, - us += 8 * uv_stride, ud += 8 * uv_stride, up += 8 * uv_stride, - vs += 8 * uv_stride, vd += 8 * uv_stride, vp += 8 * uv_stride) { - for (c = 32; c < w - 32; c += 16) { - int mi, mj, ui, uj, wm; - int bmi, bmj, bui, buj, bwm; - unsigned char ym[256]; - - if (vp9_sad16x16_sse3(ys + c, y_stride, yd + c, y_stride, 0xffff) == 0) - bmi = bmj = bui = buj = bwm = 0; - else { - COLOR_SEG_ELEMENT cs[5]; - int j; - unsigned int beste = 0xfffffff; - unsigned int bestj = 0; - - // try color from last mb segmentation - cs[0] = last; - - // try color segs from 4 pixels in mb recon as segmentation - cs[1].y = yd[c + y_stride + 1]; - cs[1].u = ud[c / 2 + uv_stride]; - cs[1].v = vd[c / 2 + uv_stride]; - cs[1].yt = cs[1].ut = cs[1].vt = 20; - cs[2].y = yd[c + w + 14]; - cs[2].u = ud[c / 2 + uv_stride + 7]; - cs[2].v = vd[c / 2 + uv_stride + 7]; - cs[2].yt = cs[2].ut = cs[2].vt = 20; - cs[3].y = yd[c + w * 14 + 1]; - cs[3].u = ud[c / 2 + uv_stride * 7]; - cs[3].v = vd[c / 2 + uv_stride * 7]; - cs[3].yt = cs[3].ut = cs[3].vt = 20; - cs[4].y = yd[c + w * 14 + 14]; - cs[4].u = ud[c / 2 + uv_stride * 7 + 7]; - cs[4].v = vd[c / 2 + uv_stride * 7 + 7]; - cs[4].yt = cs[4].ut = cs[4].vt = 20; - - for (j = 0; j < 5; j++) { - int e; - - e = fast_masked_motion_search( - ys + c, us + c / 2, vs + c / 2, y_stride, uv_stride, - yd + c, ud + c / 2, vd + c / 2, y_stride, uv_stride, - &cs[j], 1, &mi, &mj, &ui, &uj, &wm); - - if (e < beste) { - bmi = mi; - bmj = mj; - bui = ui; - buj = uj, bwm = wm; - bestj = j; - beste = e; - } - } - best = cs[bestj]; - // best = segmentation[0]; - last = best; - } - predict_all(yd + c, ud + c / 2, vd + c / 2, w, uv_stride, - yp + c, up + c / 2, vp + c / 2, w, uv_stride, - &best, 1, bmi, bmj, bui, buj, bwm); - - } - } - fwrite(prd, w * h * 3 / 2, 1, g); - t = f0; - f0 = f1; - f1 = t; - - } - fclose(f); - fclose(g); - return 0; -} diff --git a/vp9/common/x86/vp9_mask_sse3.asm b/vp9/common/x86/vp9_mask_sse3.asm deleted file mode 100644 index fe46823..0000000 --- a/vp9/common/x86/vp9_mask_sse3.asm +++ /dev/null @@ -1,484 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void int vp8_makemask_sse3( -; unsigned char *y, -; unsigned char *u, -; unsigned char *v, -; unsigned char *ym, -; unsigned char *uvm, -; int yp, -; int uvp, -; int ys, -; int us, -; int vs, -; int yt, -; int ut, -; int vt) -global sym(vp8_makemask_sse3) PRIVATE -sym(vp8_makemask_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 14 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;y - mov rdi, arg(1) ;u - mov rcx, arg(2) ;v - mov rax, arg(3) ;ym - movsxd rbx, dword arg(4) ;yp - movsxd rdx, dword arg(5) ;uvp - - pxor xmm0,xmm0 - - ;make 16 copies of the center y value - movd xmm1, arg(6) - pshufb xmm1, xmm0 - - ; make 16 copies of the center u value - movd xmm2, arg(7) - pshufb xmm2, xmm0 - - ; make 16 copies of the center v value - movd xmm3, arg(8) - pshufb xmm3, xmm0 - unpcklpd xmm2, xmm3 - - ;make 16 copies of the y tolerance - movd xmm3, arg(9) - pshufb xmm3, xmm0 - - ;make 16 copies of the u tolerance - movd xmm4, arg(10) - pshufb xmm4, xmm0 - - ;make 16 copies of the v tolerance - movd xmm5, arg(11) - pshufb xmm5, xmm0 - unpckhpd xmm4, xmm5 - - mov r8,8 - -NextPairOfRows: - - ;grab the y source values - movdqu xmm0, [rsi] - - ;compute abs difference between source and y target - movdqa xmm6, xmm1 - movdqa xmm7, xmm0 - psubusb xmm0, xmm1 - psubusb xmm6, xmm7 - por xmm0, xmm6 - - ;compute abs difference between - movdqa xmm6, xmm3 - pcmpgtb xmm6, xmm0 - - ;grab the y source values - add rsi, rbx - movdqu xmm0, [rsi] - - ;compute abs difference between source and y target - movdqa xmm11, xmm1 - movdqa xmm7, xmm0 - psubusb xmm0, xmm1 - psubusb xmm11, xmm7 - por xmm0, xmm11 - - ;compute abs difference between - movdqa xmm11, xmm3 - pcmpgtb xmm11, xmm0 - - - ;grab the u and v source values - movdqu xmm7, [rdi] - movdqu xmm8, [rcx] - unpcklpd xmm7, xmm8 - - ;compute abs difference between source and uv targets - movdqa xmm9, xmm2 - movdqa xmm10, xmm7 - psubusb xmm7, xmm2 - psubusb xmm9, xmm10 - por xmm7, xmm9 - - ;check whether the number is < tolerance - movdqa xmm0, xmm4 - pcmpgtb xmm0, xmm7 - - ;double u and v masks - movdqa xmm8, xmm0 - punpckhbw xmm0, xmm0 - punpcklbw xmm8, xmm8 - - ;mask row 0 and output - pand xmm6, xmm8 - pand xmm6, xmm0 - movdqa [rax],xmm6 - - ;mask row 1 and output - pand xmm11, xmm8 - pand xmm11, xmm0 - movdqa [rax+16],xmm11 - - - ; to the next row or set of rows - add rsi, rbx - add rdi, rdx - add rcx, rdx - add rax,32 - dec r8 - jnz NextPairOfRows - - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;GROW_HORIZ (register for result, source register or mem local) -; takes source and shifts left and ors with source -; then shifts right and ors with source -%macro GROW_HORIZ 2 - movdqa %1, %2 - movdqa xmm14, %1 - movdqa xmm15, %1 - pslldq xmm14, 1 - psrldq xmm15, 1 - por %1,xmm14 - por %1,xmm15 -%endmacro -;GROW_VERT (result, center row, above row, below row) -%macro GROW_VERT 4 - movdqa %1,%2 - por %1,%3 - por %1,%4 -%endmacro - -;GROW_NEXTLINE (new line to grow, new source, line to write) -%macro GROW_NEXTLINE 3 - GROW_HORIZ %1, %2 - GROW_VERT xmm3, xmm0, xmm1, xmm2 - movdqa %3,xmm3 -%endmacro - - -;void int vp8_growmaskmb_sse3( -; unsigned char *om, -; unsigned char *nm, -global sym(vp8_growmaskmb_sse3) PRIVATE -sym(vp8_growmaskmb_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src - mov rdi, arg(1) ;rst - - GROW_HORIZ xmm0, [rsi] - GROW_HORIZ xmm1, [rsi+16] - GROW_HORIZ xmm2, [rsi+32] - - GROW_VERT xmm3, xmm0, xmm1, xmm2 - por xmm0,xmm1 - movdqa [rdi], xmm0 - movdqa [rdi+16],xmm3 - - GROW_NEXTLINE xmm0,[rsi+48],[rdi+32] - GROW_NEXTLINE xmm1,[rsi+64],[rdi+48] - GROW_NEXTLINE xmm2,[rsi+80],[rdi+64] - GROW_NEXTLINE xmm0,[rsi+96],[rdi+80] - GROW_NEXTLINE xmm1,[rsi+112],[rdi+96] - GROW_NEXTLINE xmm2,[rsi+128],[rdi+112] - GROW_NEXTLINE xmm0,[rsi+144],[rdi+128] - GROW_NEXTLINE xmm1,[rsi+160],[rdi+144] - GROW_NEXTLINE xmm2,[rsi+176],[rdi+160] - GROW_NEXTLINE xmm0,[rsi+192],[rdi+176] - GROW_NEXTLINE xmm1,[rsi+208],[rdi+192] - GROW_NEXTLINE xmm2,[rsi+224],[rdi+208] - GROW_NEXTLINE xmm0,[rsi+240],[rdi+224] - - por xmm0,xmm2 - movdqa [rdi+240], xmm0 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - - -;unsigned int vp8_sad16x16_masked_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; unsigned char *mask) -global sym(vp8_sad16x16_masked_wmt) PRIVATE -sym(vp8_sad16x16_masked_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rbx, arg(4) ;mask - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - mov rcx, 16 - - pxor xmm3, xmm3 - -NextSadRow: - movdqu xmm0, [rsi] - movdqu xmm1, [rdi] - movdqu xmm2, [rbx] - pand xmm0, xmm2 - pand xmm1, xmm2 - - psadbw xmm0, xmm1 - paddw xmm3, xmm0 - - add rsi, rax - add rdi, rdx - add rbx, 16 - - dec rcx - jnz NextSadRow - - movdqa xmm4 , xmm3 - psrldq xmm4, 8 - paddw xmm3, xmm4 - movq rax, xmm3 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp8_sad16x16_unmasked_wmt( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; unsigned char *mask) -global sym(vp8_sad16x16_unmasked_wmt) PRIVATE -sym(vp8_sad16x16_unmasked_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rbx, arg(4) ;mask - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - mov rcx, 16 - - pxor xmm3, xmm3 - -next_vp8_sad16x16_unmasked_wmt: - movdqu xmm0, [rsi] - movdqu xmm1, [rdi] - movdqu xmm2, [rbx] - por xmm0, xmm2 - por xmm1, xmm2 - - psadbw xmm0, xmm1 - paddw xmm3, xmm0 - - add rsi, rax - add rdi, rdx - add rbx, 16 - - dec rcx - jnz next_vp8_sad16x16_unmasked_wmt - - movdqa xmm4 , xmm3 - psrldq xmm4, 8 - paddw xmm3, xmm4 - movq rax, xmm3 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp8_masked_predictor_wmt( -; unsigned char *masked, -; unsigned char *unmasked, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; unsigned char *mask) -global sym(vp8_masked_predictor_wmt) PRIVATE -sym(vp8_masked_predictor_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;ref_ptr - - mov rbx, arg(5) ;mask - movsxd rax, dword ptr arg(2) ;src_stride - mov r11, arg(3) ; destination - movsxd rdx, dword ptr arg(4) ;dst_stride - - mov rcx, 16 - - pxor xmm3, xmm3 - -next_vp8_masked_predictor_wmt: - movdqu xmm0, [rsi] - movdqu xmm1, [rdi] - movdqu xmm2, [rbx] - - pand xmm0, xmm2 - pandn xmm2, xmm1 - por xmm0, xmm2 - movdqu [r11], xmm0 - - add r11, rdx - add rsi, rax - add rdi, rdx - add rbx, 16 - - dec rcx - jnz next_vp8_masked_predictor_wmt - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;unsigned int vp8_masked_predictor_uv_wmt( -; unsigned char *masked, -; unsigned char *unmasked, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; unsigned char *mask) -global sym(vp8_masked_predictor_uv_wmt) PRIVATE -sym(vp8_masked_predictor_uv_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;ref_ptr - - mov rbx, arg(5) ;mask - movsxd rax, dword ptr arg(2) ;src_stride - mov r11, arg(3) ; destination - movsxd rdx, dword ptr arg(4) ;dst_stride - - mov rcx, 8 - - pxor xmm3, xmm3 - -next_vp8_masked_predictor_uv_wmt: - movq xmm0, [rsi] - movq xmm1, [rdi] - movq xmm2, [rbx] - - pand xmm0, xmm2 - pandn xmm2, xmm1 - por xmm0, xmm2 - movq [r11], xmm0 - - add r11, rdx - add rsi, rax - add rdi, rax - add rbx, 8 - - dec rcx - jnz next_vp8_masked_predictor_uv_wmt - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp8_uv_from_y_mask( -; unsigned char *ymask, -; unsigned char *uvmask) -global sym(vp8_uv_from_y_mask) PRIVATE -sym(vp8_uv_from_y_mask): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;dst_ptr - - - mov rcx, 8 - - pxor xmm3, xmm3 - -next_p8_uv_from_y_mask: - movdqu xmm0, [rsi] - pshufb xmm0, [shuf1b] ;[GLOBAL(shuf1b)] - movq [rdi],xmm0 - add rdi, 8 - add rsi,32 - - dec rcx - jnz next_p8_uv_from_y_mask - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -shuf1b: - db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0 - diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 587b1fc..ff1ae74 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -86,12 +86,6 @@ VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm endif -# common (c) -ifeq ($(CONFIG_CSM),yes) -VP9_COMMON_SRCS-yes += common/vp9_maskingmv.c -VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/vp9_mask_sse3.asm -endif - VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_sse2.c -- 2.7.4