*/
DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
#endif
-
-#if CONFIG_RUNTIME_CPU_DETECT
- struct VP8_COMMON_RTCD *rtcd;
-#endif
} MACROBLOCKD;
prototype int vp8_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4
+#
+# Alt-ref Noise Reduction (ARNR)
+#
+if [ "$CONFIG_REALTIME_ONLY" != "yes" ]; then
+ prototype void vp8_temporal_filter_apply "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"
+ specialize vp8_temporal_filter_apply sse2
+fi
# End of encoder only functions
fi
}
}
-#if CONFIG_RUNTIME_CPU_DETECT
-#define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
-#else
-#define RTCD_VTABLE(x) NULL
-#endif
static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
unsigned int mb_idx)
extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
-#if CONFIG_RUNTIME_CPU_DETECT
-#define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
-#else
-#define RTCD_VTABLE(x) NULL
-#endif
-
static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
{
VP8_COMMON *const pc = & pbi->common;
for (i = 0; i < count; i++)
{
MACROBLOCKD *mbd = &mbrd[i].mbd;
-#if CONFIG_RUNTIME_CPU_DETECT
- mbd->rtcd = xd->rtcd;
-#endif
mbd->subpixel_predict = xd->subpixel_predict;
mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
#include "vp8/common/invtrans.h"
#include "vpx_ports/vpx_timer.h"
-#if CONFIG_RUNTIME_CPU_DETECT
-#define RTCD(x) &cpi->common.rtcd.x
-#define IF_RTCD(x) (x)
-#else
-#define RTCD(x) NULL
-#define IF_RTCD(x) NULL
-#endif
extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
extern void vp8_calc_ref_frame_costs(int *ref_frame_cost,
int prob_intra,
}
if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED)
- vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
+ vp8_encode_intra4x4mby(x);
else
- vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
+ vp8_encode_intra16x16mby(x);
- vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
+ vp8_encode_intra16x16mbuv(x);
sum_intra_stats(cpi, x);
vp8_tokenize_mb(cpi, &x->e_mbd, t);
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
- vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
+ vp8_encode_intra16x16mbuv(x);
if (xd->mode_info_context->mbmi.mode == B_PRED)
{
- vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
+ vp8_encode_intra4x4mby(x);
}
else
{
- vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
+ vp8_encode_intra16x16mby(x);
}
sum_intra_stats(cpi, x);
if (!x->skip)
{
- vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
+ vp8_encode_inter16x16(x);
// Clear mb_skip_coeff if mb_no_coeff_skip is not set
if (!cpi->common.mb_no_coeff_skip)
#include "encodeintra.h"
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#else
-#define IF_RTCD(x) NULL
-#endif
-
int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
{
if (use_dc_pred)
{
- const VP8_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd);
-
x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
- vp8_encode_intra16x16mby(rtcd, x);
+ vp8_encode_intra16x16mby(x);
vp8_inverse_transform_mby(&x->e_mbd);
}
for (i = 0; i < 16; i++)
{
x->e_mbd.block[i].bmi.as_mode = B_DC_PRED;
- vp8_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i);
+ vp8_encode_intra4x4block(x, i);
}
}
return intra_pred_var;
}
-void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
- MACROBLOCK *x, int ib)
+void vp8_encode_intra4x4block(MACROBLOCK *x, int ib)
{
BLOCKD *b = &x->e_mbd.block[ib];
BLOCK *be = &x->block[ib];
}
}
-void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
+void vp8_encode_intra4x4mby(MACROBLOCK *mb)
{
int i;
vp8_intra_prediction_down_copy(x);
for (i = 0; i < 16; i++)
- vp8_encode_intra4x4block(rtcd, mb, i);
+ vp8_encode_intra4x4block(mb, i);
return;
}
-void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+void vp8_encode_intra16x16mby(MACROBLOCK *x)
{
BLOCK *b = &x->block[0];
MACROBLOCKD *xd = &x->e_mbd;
vp8_quantize_mby(x);
if (x->optimize)
- vp8_optimize_mby(x, rtcd);
+ vp8_optimize_mby(x);
}
-void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+void vp8_encode_intra16x16mbuv(MACROBLOCK *x)
{
MACROBLOCKD *xd = &x->e_mbd;
vp8_quantize_mbuv(x);
if (x->optimize)
- vp8_optimize_mbuv(x, rtcd);
+ vp8_optimize_mbuv(x);
}
#include "onyx_int.h"
int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred);
-void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
-void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
-void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *, MACROBLOCK *mb);
-void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
- MACROBLOCK *x, int ib);
+void vp8_encode_intra16x16mby(MACROBLOCK *x);
+void vp8_encode_intra16x16mbuv(MACROBLOCK *x);
+void vp8_encode_intra4x4mby(MACROBLOCK *mb);
+void vp8_encode_intra4x4block(MACROBLOCK *x, int ib);
#endif
#include "vpx_mem/vpx_mem.h"
#include "rdopt.h"
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#else
-#define IF_RTCD(x) NULL
-#endif
void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
{
unsigned char *src_ptr = (*(be->base_src) + be->src);
}
}
-static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+static void vp8_subtract_mb(MACROBLOCK *x)
{
BLOCK *b = &x->block[0];
};
static void optimize_b(MACROBLOCK *mb, int ib, int type,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
- const VP8_ENCODER_RTCD *rtcd)
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
{
BLOCK *b;
BLOCKD *d;
}
}
-static void optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
+static void optimize_mb(MACROBLOCK *x)
{
int b;
int type;
for (b = 0; b < 16; b++)
{
optimize_b(x, b, type,
- ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
}
for (b = 16; b < 24; b++)
{
optimize_b(x, b, PLANE_TYPE_UV,
- ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
}
if (has_2nd_order)
{
b=24;
optimize_b(x, b, PLANE_TYPE_Y2,
- ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
ta + vp8_block2above[b], tl + vp8_block2left[b]);
}
}
-void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
+void vp8_optimize_mby(MACROBLOCK *x)
{
int b;
int type;
for (b = 0; b < 16; b++)
{
optimize_b(x, b, type,
- ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
}
{
b=24;
optimize_b(x, b, PLANE_TYPE_Y2,
- ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
ta + vp8_block2above[b], tl + vp8_block2left[b]);
}
}
-void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
+void vp8_optimize_mbuv(MACROBLOCK *x)
{
int b;
ENTROPY_CONTEXT_PLANES t_above, t_left;
for (b = 16; b < 24; b++)
{
optimize_b(x, b, PLANE_TYPE_UV,
- ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
+ ta + vp8_block2above[b], tl + vp8_block2left[b]);
}
}
-void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+void vp8_encode_inter16x16(MACROBLOCK *x)
{
vp8_build_inter_predictors_mb(&x->e_mbd);
- vp8_subtract_mb(rtcd, x);
+ vp8_subtract_mb(x);
transform_mb(x);
vp8_quantize_mb(x);
if (x->optimize)
- optimize_mb(x, rtcd);
+ optimize_mb(x);
}
/* this funciton is used by first pass only */
-void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
+void vp8_encode_inter16x16y(MACROBLOCK *x)
{
BLOCK *b = &x->block[0];
#define __INC_ENCODEMB_H
#include "onyx_int.h"
-struct VP8_ENCODER_RTCD;
-void vp8_encode_inter16x16(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
+void vp8_encode_inter16x16(MACROBLOCK *x);
void vp8_build_dcblock(MACROBLOCK *b);
void vp8_transform_mb(MACROBLOCK *mb);
void vp8_transform_mbuv(MACROBLOCK *x);
void vp8_transform_intra_mby(MACROBLOCK *x);
-void vp8_optimize_mby(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
-void vp8_optimize_mbuv(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
-void vp8_encode_inter16x16y(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
+void vp8_optimize_mby(MACROBLOCK *x);
+void vp8_optimize_mbuv(MACROBLOCK *x);
+void vp8_encode_inter16x16y(MACROBLOCK *x);
#endif
mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
-#if CONFIG_RUNTIME_CPU_DETECT
- mbd->rtcd = xd->rtcd;
-#endif
mb->gf_active_ptr = x->gf_active_ptr;
vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
//#define OUTPUT_FPF 1
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#else
-#define IF_RTCD(x) NULL
-#endif
-
extern void vp8_build_block_offsets(MACROBLOCK *x);
extern void vp8_setup_block_ptrs(MACROBLOCK *x);
extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
d->bmi.mv.as_mv.col <<= 3;
this_error = motion_error;
vp8_set_mbmode_and_mvs(x, NEWMV, &d->bmi.mv);
- vp8_encode_inter16x16y(IF_RTCD(&cpi->rtcd), x);
+ vp8_encode_inter16x16y(x);
sum_mvr += d->bmi.mv.as_mv.row;
sum_mvr_abs += abs(d->bmi.mv.as_mv.row);
sum_mvc += d->bmi.mv.as_mv.col;
void vp8_cmachine_specific_config(VP8_COMP *cpi)
{
-#if CONFIG_RUNTIME_CPU_DETECT
-#if !(CONFIG_REALTIME_ONLY)
- cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
-#endif
-#endif
-
// Pure C:
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
#include "vp8/common/swapyv12buffer.h"
#include "vp8/common/threading.h"
#include "vpx_ports/vpx_timer.h"
-#include "temporal_filter.h"
#if ARCH_ARM
#include "vpx_ports/arm.h"
#endif
#include <stdio.h>
#include <limits.h>
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#define RTCD(x) &cpi->common.rtcd.x
-#else
-#define IF_RTCD(x) NULL
-#define RTCD(x) NULL
-#endif
-
extern void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi);
extern void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val);
extern void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi);
#include "vpx_ports/mem.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "mcomp.h"
-#include "temporal_filter.h"
#include "vp8/common/findnearmv.h"
#include "lookahead.h"
void *ptr1;
} LPFTHREAD_DATA;
-
-typedef struct VP8_ENCODER_RTCD
-{
- vp8_temporal_rtcd_vtable_t temporal;
-} VP8_ENCODER_RTCD;
-
enum
{
BLOCK_16X8,
double est_max_qcorrection_factor;
} twopass;
-#if CONFIG_RUNTIME_CPU_DETECT
- VP8_ENCODER_RTCD rtcd;
-#endif
#if VP8_TEMPORAL_ALT_REF
YV12_BUFFER_CONFIG alt_ref_buffer;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
#include "rdopt.h"
#include "vpx_mem/vpx_mem.h"
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#else
-#define IF_RTCD(x) NULL
-#endif
-
extern int VP8_UVSSE(MACROBLOCK *x);
#ifdef SPEEDSTATS
}
static int pick_intra4x4block(
- const VP8_ENCODER_RTCD *rtcd,
MACROBLOCK *x,
int ib,
B_PREDICTION_MODE *best_mode,
}
b->bmi.as_mode = (B_PREDICTION_MODE)(*best_mode);
- vp8_encode_intra4x4block(rtcd, x, ib);
+ vp8_encode_intra4x4block(x, ib);
return best_rd;
}
static int pick_intra4x4mby_modes
(
- const VP8_ENCODER_RTCD *rtcd,
MACROBLOCK *mb,
int *Rate,
int *best_dist
}
- pick_intra4x4block(rtcd, mb, i, &best_mode, bmode_costs, &r, &d);
+ pick_intra4x4block(mb, i, &best_mode, bmode_costs, &r, &d);
cost += r;
distortion += d;
case B_PRED:
/* Pass best so far to pick_intra4x4mby_modes to use as breakout */
distortion2 = best_sse;
- pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, &distortion2);
+ pick_intra4x4mby_modes(x, &rate, &distortion2);
if (distortion2 == INT_MAX)
{
}
x->e_mbd.mode_info_context->mbmi.mode = best_mode;
- error4x4 = pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate,
+ error4x4 = pick_intra4x4mby_modes(x, &rate,
&best_sse);
if (error4x4 < error16x16)
{
extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#else
-#define IF_RTCD(x) NULL
-#endif
-
extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
YV12_BUFFER_CONFIG *dst_ybc);
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/systemdependent.h"
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#else
-#define IF_RTCD(x) NULL
-#endif
-
-
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
predictor);
// Apply the filter (YUV)
- TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
+ vp8_temporal_filter_apply
(f->y_buffer + mb_y_offset,
f->y_stride,
predictor,
accumulator,
count);
- TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
+ vp8_temporal_filter_apply
(f->u_buffer + mb_uv_offset,
f->uv_stride,
predictor + 256,
accumulator + 256,
count + 256);
- TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
+ vp8_temporal_filter_apply
(f->v_buffer + mb_uv_offset,
f->uv_stride,
predictor + 320,
+++ /dev/null
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __INC_VP8_TEMPORAL_FILTER_H
-#define __INC_VP8_TEMPORAL_FILTER_H
-
-#define prototype_apply(sym)\
- void (sym) \
- ( \
- unsigned char *frame1, \
- unsigned int stride, \
- unsigned char *frame2, \
- unsigned int block_size, \
- int strength, \
- int filter_weight, \
- unsigned int *accumulator, \
- unsigned short *count \
- )
-
-#if ARCH_X86 || ARCH_X86_64
-#include "x86/temporal_filter_x86.h"
-#endif
-
-#ifndef vp8_temporal_filter_apply
-#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
-#endif
-extern prototype_apply(vp8_temporal_filter_apply);
-
-typedef struct
-{
- prototype_apply(*apply);
-} vp8_temporal_rtcd_vtable_t;
-
-#if CONFIG_RUNTIME_CPU_DETECT
-#define TEMPORAL_INVOKE(ctx,fn) (ctx)->fn
-#else
-#define TEMPORAL_INVOKE(ctx,fn) vp8_temporal_filter_##fn
-#endif
-
-#endif // __INC_VP8_TEMPORAL_FILTER_H
+++ /dev/null
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __INC_VP8_TEMPORAL_FILTER_X86_H
-#define __INC_VP8_TEMPORAL_FILTER_X86_H
-
-#if HAVE_SSE2
-extern prototype_apply(vp8_temporal_filter_apply_sse2);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef vp8_temporal_filter_apply
-#define vp8_temporal_filter_apply vp8_temporal_filter_apply_sse2
-
-#endif
-
-#endif
-
-#endif // __INC_VP8_TEMPORAL_FILTER_X86_H
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
{
-#if CONFIG_RUNTIME_CPU_DETECT
- int flags = x86_simd_caps();
-
- /* Note:
- *
- * This platform can be built without runtime CPU detection as well. If
- * you modify any of the function mappings present in this file, be sure
- * to also update them in static mapings (<arch>/filename_<arch>.h)
- */
-
- /* Override default functions with fastest ones for this CPU. */
-#if HAVE_SSE2
- if (flags & HAS_SSE2)
- {
-#if !(CONFIG_REALTIME_ONLY)
- cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
-#endif
-
- }
-#endif
-
-#endif
}
VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h
VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c
VP8_CX_SRCS-yes += encoder/temporal_filter.c
-VP8_CX_SRCS-yes += encoder/temporal_filter.h
VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.c
VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.h
VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c
endif
-VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm