-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "memory.h"
-#include "preproc.h"
-#include "pragmas.h"
-
-/****************************************************************************
-* Macros
-****************************************************************************/
-#define FRAMECOUNT 7
-#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
-
-/****************************************************************************
-* Imports
-****************************************************************************/
-extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
-
-/****************************************************************************
-* Exported Global Variables
-****************************************************************************/
-void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
-
-/****************************************************************************
- *
- * ROUTINE : temp_filter_wmt
- *
- * INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
- * unsigned char *s : Pointer to source frame.
- * unsigned char *d : Pointer to destination frame.
- * int bytes : Number of bytes to filter.
- * int strength : Strength of filter to apply.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs a closesness adjusted temporarl blur
- *
- * SPECIAL NOTES : Destination frame can be same as source frame.
- *
- ****************************************************************************/
-void temp_filter_wmt
-(
- pre_proc_instance *ppi,
- unsigned char *s,
- unsigned char *d,
- int bytes,
- int strength
-)
-{
- int byte = 0;
- unsigned char *frameptr = ppi->frame_buffer;
-
- __declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3, 3, 3, 3, 3};
- __declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16, 16, 16, 16, 16};
-
- if (ppi->frame == 0)
- {
- do
- {
- int i;
- int frame = 0;
-
- do
- {
- for (i = 0; i < 8; i++)
- {
- *frameptr = s[byte+i];
- ++frameptr;
- }
-
- ++frame;
- }
- while (frame < FRAMECOUNT);
-
- for (i = 0; i < 8; i++)
- d[byte+i] = s[byte+i];
-
- byte += 8;
-
- }
- while (byte < bytes);
- }
- else
- {
- int i;
- int offset2 = (ppi->frame % FRAMECOUNT);
-
- do
- {
- __declspec(align(16)) unsigned short counts[8];
- __declspec(align(16)) unsigned short sums[8];
- __asm
- {
- mov eax, offset2
- mov edi, s // source pixels
- pxor xmm1, xmm1 // accumulator
-
- pxor xmm7, xmm7
-
- mov esi, frameptr // accumulator
- pxor xmm2, xmm2 // count
-
- movq xmm3, QWORD PTR [edi]
-
- movq QWORD PTR [esi+8*eax], xmm3
-
- punpcklbw xmm3, xmm2 // xmm3 source pixels
- mov ecx, FRAMECOUNT
-
- next_frame:
- movq xmm4, QWORD PTR [esi] // get frame buffer values
- punpcklbw xmm4, xmm7 // xmm4 frame buffer pixels
- movdqa xmm6, xmm4 // save the pixel values
- psubsw xmm4, xmm3 // subtracted pixel values
- pmullw xmm4, xmm4 // square xmm4
- movd xmm5, strength
- psrlw xmm4, xmm5 // should be strength
- pmullw xmm4, threes // 3 * modifier
- movdqa xmm5, sixteens // 16s
- psubusw xmm5, xmm4 // 16 - modifiers
- movdqa xmm4, xmm5 // save the modifiers
- pmullw xmm4, xmm6 // multiplier values
- paddusw xmm1, xmm4 // accumulator
- paddusw xmm2, xmm5 // count
- add esi, 8 // next frame
- dec ecx // next set of eight pixels
- jnz next_frame
-
- movdqa counts, xmm2
- psrlw xmm2, 1 // divide count by 2 for rounding
- paddusw xmm1, xmm2 // rounding added in
-
- mov frameptr, esi
-
- movdqa sums, xmm1
- }
-
- for (i = 0; i < 8; i++)
- {
- int blurvalue = sums[i] * ppi->fixed_divide[counts[i]];
- blurvalue >>= 16;
- d[i] = blurvalue;
- }
-
- s += 8;
- d += 8;
- byte += 8;
- }
- while (byte < bytes);
- }
-
- ++ppi->frame;
- __asm emms
-}
-
-/****************************************************************************
- *
- * ROUTINE : temp_filter_mmx
- *
- * INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
- * unsigned char *s : Pointer to source frame.
- * unsigned char *d : Pointer to destination frame.
- * int bytes : Number of bytes to filter.
- * int strength : Strength of filter to apply.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs a closesness adjusted temporarl blur
- *
- * SPECIAL NOTES : Destination frame can be same as source frame.
- *
- ****************************************************************************/
-void temp_filter_mmx
-(
- pre_proc_instance *ppi,
- unsigned char *s,
- unsigned char *d,
- int bytes,
- int strength
-)
-{
- int byte = 0;
- unsigned char *frameptr = ppi->frame_buffer;
-
- __declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3};
- __declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16};
-
- if (ppi->frame == 0)
- {
- do
- {
- int i;
- int frame = 0;
-
- do
- {
- for (i = 0; i < 4; i++)
- {
- *frameptr = s[byte+i];
- ++frameptr;
- }
-
- ++frame;
- }
- while (frame < FRAMECOUNT);
-
- for (i = 0; i < 4; i++)
- d[byte+i] = s[byte+i];
-
- byte += 4;
-
- }
- while (byte < bytes);
- }
- else
- {
- int i;
- int offset2 = (ppi->frame % FRAMECOUNT);
-
- do
- {
- __declspec(align(16)) unsigned short counts[8];
- __declspec(align(16)) unsigned short sums[8];
- __asm
- {
-
- mov eax, offset2
- mov edi, s // source pixels
- pxor mm1, mm1 // accumulator
- pxor mm7, mm7
-
- mov esi, frameptr // accumulator
- pxor mm2, mm2 // count
-
- movd mm3, DWORD PTR [edi]
- movd DWORD PTR [esi+4*eax], mm3
-
- punpcklbw mm3, mm2 // mm3 source pixels
- mov ecx, FRAMECOUNT
-
- next_frame:
- movd mm4, DWORD PTR [esi] // get frame buffer values
- punpcklbw mm4, mm7 // mm4 frame buffer pixels
- movq mm6, mm4 // save the pixel values
- psubsw mm4, mm3 // subtracted pixel values
- pmullw mm4, mm4 // square mm4
- movd mm5, strength
- psrlw mm4, mm5 // should be strength
- pmullw mm4, threes // 3 * modifier
- movq mm5, sixteens // 16s
- psubusw mm5, mm4 // 16 - modifiers
- movq mm4, mm5 // save the modifiers
- pmullw mm4, mm6 // multiplier values
- paddusw mm1, mm4 // accumulator
- paddusw mm2, mm5 // count
- add esi, 4 // next frame
- dec ecx // next set of eight pixels
- jnz next_frame
-
- movq counts, mm2
- psrlw mm2, 1 // divide count by 2 for rounding
- paddusw mm1, mm2 // rounding added in
-
- mov frameptr, esi
-
- movq sums, mm1
-
- }
-
- for (i = 0; i < 4; i++)
- {
- int blurvalue = sums[i] * ppi->fixed_divide[counts[i]];
- blurvalue >>= 16;
- d[i] = blurvalue;
- }
-
- s += 4;
- d += 4;
- byte += 4;
- }
- while (byte < bytes);
- }
-
- ++ppi->frame;
- __asm emms
-}