static const int qrounding_factors[129] =
{
- 56, 56, 56, 56, 48, 48, 56, 56,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
- 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48
};
static const int qzbin_factors[129] =
{
- 72, 72, 72, 72, 80, 80, 72, 72,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
- 80,
+ 80
};
static const int qrounding_factors_y2[129] =
{
- 56, 56, 56, 56, 48, 48, 56, 56,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
- 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48
};
static const int qzbin_factors_y2[129] =
{
- 72, 72, 72, 72, 80, 80, 72, 72,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
+ 84, 84, 84, 84, 84, 84, 84, 84,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
- 80, 80, 80, 80, 80, 80, 80, 80,
- 80,
+ 80
};
-//#define EXACT_QUANT
+#define EXACT_QUANT
#ifdef EXACT_QUANT
-static void vp8cx_invert_quant(short *quant, short *shift, short d)
+static void vp8cx_invert_quant(int improved_quant, short *quant,
+ short *shift, short d)
{
- unsigned t;
- int l;
- t = d;
- for(l = 0; t > 1; l++)
- t>>=1;
- t = 1 + (1<<(16+l))/d;
- *quant = (short)(t - (1<<16));
- *shift = l;
+ if(improved_quant)
+ {
+ unsigned t;
+ int l;
+ t = d;
+ for(l = 0; t > 1; l++)
+ t>>=1;
+ t = 1 + (1<<(16+l))/d;
+ *quant = (short)(t - (1<<16));
+ *shift = l;
+ }
+ else
+ {
+ *quant = (1 << 16) / d;
+ *shift = 0;
+ }
}
void vp8cx_init_quantizer(VP8_COMP *cpi)
{
// dc values
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
- vp8cx_invert_quant(cpi->Y1quant[Q] + 0,
+ cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
+ vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
cpi->Y1quant_shift[Q] + 0, quant_val);
cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
- vp8cx_invert_quant(cpi->Y2quant[Q] + 0,
+ cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
+ vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
cpi->Y2quant_shift[Q] + 0, quant_val);
cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
- vp8cx_invert_quant(cpi->UVquant[Q] + 0,
+ cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
+ vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
cpi->UVquant_shift[Q] + 0, quant_val);
cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
int rc = vp8_default_zig_zag1d[i];
quant_val = vp8_ac_yquant(Q);
- vp8cx_invert_quant(cpi->Y1quant[Q] + rc,
+ cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
+ vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
cpi->Y1quant_shift[Q] + rc, quant_val);
cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
- vp8cx_invert_quant(cpi->Y2quant[Q] + rc,
+ cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
+ vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
cpi->Y2quant_shift[Q] + rc, quant_val);
cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- vp8cx_invert_quant(cpi->UVquant[Q] + rc,
+ cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
+ vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
cpi->UVquant_shift[Q] + rc, quant_val);
cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
for (i = 0; i < 16; i++)
{
x->block[i].quant = cpi->Y1quant[QIndex];
+ x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
x->block[i].zbin = cpi->Y1zbin[QIndex];
x->block[i].round = cpi->Y1round[QIndex];
for (i = 16; i < 24; i++)
{
x->block[i].quant = cpi->UVquant[QIndex];
+ x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
x->block[i].zbin = cpi->UVzbin[QIndex];
x->block[i].round = cpi->UVround[QIndex];
// Y2
zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
+ x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
x->block[24].quant = cpi->Y2quant[QIndex];
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
x->block[24].zbin = cpi->Y2zbin[QIndex];
x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
x->block[24].zbin_extra = (short)zbin_extra;
+
+ /* save this macroblock QIndex for vp8_update_zbin_extra() */
+ x->q_index = QIndex;
+}
+void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
+{
+ int i;
+ int QIndex = x->q_index;
+ int zbin_extra;
+
+ // Y
+ zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+ for (i = 0; i < 16; i++)
+ {
+ x->block[i].zbin_extra = (short)zbin_extra;
+ }
+
+ // UV
+ zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+ for (i = 16; i < 24; i++)
+ {
+ x->block[i].zbin_extra = (short)zbin_extra;
+ }
+
+ // Y2
+ zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
+ x->block[24].zbin_extra = (short)zbin_extra;
}
void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
{
+ // Clear Zbin mode boost for default case
+ cpi->zbin_mode_boost = 0;
+
// vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
// when these values are not all zero.
if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
}
+/* activity_avg must be positive, or flat regions could get a zero weight
+ * (infinite lambda), which confounds analysis.
+ * This also avoids the need for divide by zero checks in
+ * vp8_activity_masking().
+ */
+#define VP8_ACTIVITY_AVG_MIN (64)
+
+/* This is used as a reference when computing the source variance for the
+ * purposes of activity masking.
+ * Eventually this should be replaced by custom no-reference routines,
+ * which will be faster.
+ */
+static const unsigned char VP8_VAR_OFFS[16]=
+{
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
+};
+
+unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
+{
+ unsigned int act;
+ unsigned int sse;
+ int sum;
+ unsigned int a;
+ unsigned int b;
+ /* TODO: This could also be done over smaller areas (8x8), but that would
+ * require extensive changes elsewhere, as lambda is assumed to be fixed
+ * over an entire MB in most of the code.
+ * Another option is to compute four 8x8 variances, and pick a single
+ * lambda using a non-linear combination (e.g., the smallest, or second
+ * smallest, etc.).
+ */
+ VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
+ x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
+ /* This requires a full 32 bits of precision. */
+ act = (sse<<8) - sum*sum;
+ /* Drop 4 to give us some headroom to work with. */
+ act = (act + 8) >> 4;
+ /* If the region is flat, lower the activity some more. */
+ if (act < 8<<12)
+ act = act < 5<<12 ? act : 5<<12;
+ /* TODO: For non-flat regions, edge regions should receive less masking
+ * than textured regions, but identifying edge regions quickly and
+ * reliably enough is still a subject of experimentation.
+ * This will be most noticable near edges with a complex shape (e.g.,
+ * text), but the 4x4 transform size should make this less of a problem
+ * than it would be for an 8x8 transform.
+ */
+ /* Apply the masking to the RD multiplier. */
+ a = act + 4*cpi->activity_avg;
+ b = 4*act + cpi->activity_avg;
+ x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
+ return act;
+}
+
+
static
void encode_mb_row(VP8_COMP *cpi,
int *segment_counts,
int *totalrate)
{
+ INT64 activity_sum = 0;
int i;
int recon_yoffset, recon_uvoffset;
int mb_col;
// Set up limit values for vertical motion vector components
// to prevent them extending beyond the UMV borders
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
- x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+ x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+ (VP8BORDERINPIXELS - 16);
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
- // Distance of Mb to the left & right edges, specified in
- // 1/8th pel units as they are always compared to values
+ // Distance of Mb to the left & right edges, specified in
+ // 1/8th pel units as they are always compared to values
// that are in 1/8th pel units
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
// Set up limit values for horizontal motion vector components
// to prevent them extending beyond the UMV borders
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
- x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
+ x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
+ (VP8BORDERINPIXELS - 16);
xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
+ x->rddiv = cpi->RDDIV;
+ x->rdmult = cpi->RDMULT;
+
+ if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
+ activity_sum += vp8_activity_masking(cpi, x);
+
// Is segmentation enabled
// MB level adjutment to quantizer
if (xd->segmentation_enabled)
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
+ x->activity_sum += activity_sum;
}
VP8_COMMON *const cm = & cpi->common;
MACROBLOCKD *const xd = & x->e_mbd;
- int i;
TOKENEXTRA *tp = cpi->tok;
int segment_counts[MAX_MB_SEGMENTS];
int totalrate;
}
vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
- //vp8_initialize_rd_consts( cpi, vp8_dc_quant(cpi->avg_frame_qindex, cm->y1dc_delta_q) );
vp8cx_initialize_me_consts(cpi, cm->base_qindex);
- //vp8cx_initialize_me_consts( cpi, cpi->avg_frame_qindex);
// Copy data over into macro block data sturctures.
vp8_setup_block_ptrs(x);
- x->rddiv = cpi->RDDIV;
- x->rdmult = cpi->RDMULT;
-
-#if 0
- // Experimental rd code
- // 2 Pass - Possibly set Rdmult based on last frame distortion + this frame target bits or other metrics
- // such as cpi->rate_correction_factor that indicate relative complexity.
- /*if ( cpi->pass == 2 && (cpi->last_frame_distortion > 0) && (cpi->target_bits_per_mb > 0) )
- {
- //x->rdmult = ((cpi->last_frame_distortion * 256)/cpi->common.MBs)/ cpi->target_bits_per_mb;
- x->rdmult = (int)(cpi->RDMULT * cpi->rate_correction_factor);
- }
- else
- x->rdmult = cpi->RDMULT; */
- //x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 ));
-#endif
+ x->activity_sum = 0;
xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.uv_mode = DC_PRED;
else
{
#if CONFIG_MULTITHREAD
+ int i;
+
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
{
- int i;
cpi->current_mb_col_main = -1;
for (i = 0; i < cpi->encoding_thread_count; i++)
totalrate += cpi->mb_row_ei[i].totalrate;
}
+ for (i = 0; i < cpi->encoding_thread_count; i++)
+ {
+ x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
+ }
+
#endif
}
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
+ /* Update the average activity for the next frame.
+ * This is feed-forward for now; it could also be saved in two-pass, or
+ * done during lookahead when that is eventually added.
+ */
+ cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
+ if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
+ cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
+
}
void vp8_setup_block_ptrs(MACROBLOCK *x)
{
if (cpi->sf.RD)
{
+ int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
+
+ /* Are we using the fast quantizer for the mode selection? */
+ if(cpi->sf.use_fastquant_for_pick)
+ {
+ cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
+
+ /* the fast quantizer does not use zbin_extra, so
+ * do not recalculate */
+ cpi->zbin_mode_boost_enabled = 0;
+ }
inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
+
+ /* switch back to the regular quantizer for the encode */
+ if (cpi->sf.improved_quant)
+ {
+ cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
+ }
+
+ /* restore cpi->zbin_mode_boost_enabled */
+ cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
+
}
else
#endif
#endif
// MB level adjutment to quantizer setup
- if (xd->segmentation_enabled || cpi->zbin_mode_boost_enabled)
+ if (xd->segmentation_enabled)
{
// If cyclic update enabled
if (cpi->cyclic_refresh_mode_enabled)
((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
{
xd->mode_info_context->mbmi.segment_id = 0;
+
+ /* segment_id changed, so update */
+ vp8cx_mb_init_quantizer(cpi, x);
}
}
+ }
+ {
// Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
if (cpi->zbin_mode_boost_enabled)
{
- if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME))
- cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+ if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
+ cpi->zbin_mode_boost = 0;
else
- cpi->zbin_mode_boost = 0;
+ {
+ if (xd->mode_info_context->mbmi.mode == ZEROMV)
+ {
+ if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
+ cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+ else
+ cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
+ }
+ else if (xd->mode_info_context->mbmi.mode == SPLITMV)
+ cpi->zbin_mode_boost = 0;
+ else
+ cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+ }
}
+ else
+ cpi->zbin_mode_boost = 0;
- vp8cx_mb_init_quantizer(cpi, x);
+ vp8_update_zbin_extra(cpi, x);
}
cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;