diff options
author | Vignesh Venkatasubramanian <vigneshv@google.com> | 2014-03-26 15:13:32 -0700 |
---|---|---|
committer | Vignesh Venkatasubramanian <vigneshv@google.com> | 2014-03-26 15:13:32 -0700 |
commit | 2ec72e65689c948e92b826ae1e867bf369e72f13 (patch) | |
tree | 09d08252ba727c6c2e090222ea15ccc3b143301a /libvpx/vp9/encoder/vp9_rdopt.c | |
parent | 9b35249446b07f40ac5fcc3205f2c048616efacc (diff) | |
download | android_external_libvpx-2ec72e65689c948e92b826ae1e867bf369e72f13.tar.gz android_external_libvpx-2ec72e65689c948e92b826ae1e867bf369e72f13.tar.bz2 android_external_libvpx-2ec72e65689c948e92b826ae1e867bf369e72f13.zip |
libvpx: Roll latest libvpx
Pulling changes related to fixing a rare bitstream issue on video upscaling.
Also enables VP9 encoder. This is exactly the same as the roll that is
already in master.
Upstream Hash: 8e9c9f118cda45013f14cce7961dcc8df78ffebf
Change-Id: Iba9bb1c1804462014deab20fa6f4c5e4c55a778e
Diffstat (limited to 'libvpx/vp9/encoder/vp9_rdopt.c')
-rw-r--r-- | libvpx/vp9/encoder/vp9_rdopt.c | 2952 |
1 files changed, 1437 insertions, 1515 deletions
diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c index 78cb06b..2fd25ef 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.c +++ b/libvpx/vp9/encoder/vp9_rdopt.c @@ -8,39 +8,43 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include <stdio.h> -#include <math.h> -#include <limits.h> #include <assert.h> +#include <limits.h> +#include <math.h> +#include <stdio.h> -#include "vp9/common/vp9_pragmas.h" -#include "vp9/encoder/vp9_tokenize.h" -#include "vp9/encoder/vp9_treewriter.h" -#include "vp9/encoder/vp9_onyx_int.h" -#include "vp9/encoder/vp9_modecosts.h" -#include "vp9/encoder/vp9_encodeintra.h" +#include "./vp9_rtcd.h" + +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_idct.h" +#include "vp9/common/vp9_mvref_common.h" +#include "vp9/common/vp9_pragmas.h" +#include "vp9/common/vp9_pred_common.h" +#include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_findnearmv.h" -#include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_systemdependent.h" + +#include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encodemb.h" -#include "vp9/encoder/vp9_quantize.h" -#include "vp9/encoder/vp9_variance.h" +#include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_mcomp.h" -#include "vp9/encoder/vp9_rdopt.h" +#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_systemdependent.h" -#include "vp9/encoder/vp9_encodemv.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_pred_common.h" -#include "vp9/common/vp9_entropy.h" -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_mvref_common.h" -#include "vp9/common/vp9_common.h" +#include "vp9/encoder/vp9_rdopt.h" +#include "vp9/encoder/vp9_tokenize.h" +#include "vp9/encoder/vp9_variance.h" -#define INVALID_MV 0x80008000 +#define RD_THRESH_MAX_FACT 64 +#define RD_THRESH_INC 1 +#define RD_THRESH_POW 1.25 +#define RD_MULT_EPB_RATIO 64 /* Factor to weigh the rate for switchable interp filters */ #define SWITCHABLE_INTERP_RATE_FACTOR 1 @@ -51,53 +55,79 @@ #define MIN_EARLY_TERM_INDEX 3 +typedef struct { + MB_PREDICTION_MODE mode; + MV_REFERENCE_FRAME ref_frame[2]; +} MODE_DEFINITION; + +typedef struct { + MV_REFERENCE_FRAME ref_frame[2]; +} REF_DEFINITION; + +struct rdcost_block_args { + MACROBLOCK *x; + ENTROPY_CONTEXT t_above[16]; + ENTROPY_CONTEXT t_left[16]; + int rate; + int64_t dist; + int64_t sse; + int this_rate; + int64_t this_dist; + int64_t this_sse; + int64_t this_rd; + int64_t best_rd; + int skip; + int use_fast_coef_costing; + const scan_order *so; +}; + const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { - {NEARESTMV, LAST_FRAME, NONE}, - {NEARESTMV, ALTREF_FRAME, NONE}, - {NEARESTMV, GOLDEN_FRAME, NONE}, - - {DC_PRED, INTRA_FRAME, NONE}, - - {NEWMV, LAST_FRAME, NONE}, - {NEWMV, ALTREF_FRAME, NONE}, - {NEWMV, GOLDEN_FRAME, NONE}, - - {NEARMV, LAST_FRAME, NONE}, - {NEARMV, ALTREF_FRAME, NONE}, - {NEARESTMV, LAST_FRAME, ALTREF_FRAME}, - {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME}, - - {TM_PRED, INTRA_FRAME, NONE}, - - {NEARMV, LAST_FRAME, ALTREF_FRAME}, - {NEWMV, LAST_FRAME, ALTREF_FRAME}, - {NEARMV, GOLDEN_FRAME, NONE}, - {NEARMV, GOLDEN_FRAME, ALTREF_FRAME}, - {NEWMV, GOLDEN_FRAME, ALTREF_FRAME}, - - {ZEROMV, LAST_FRAME, NONE}, - {ZEROMV, GOLDEN_FRAME, NONE}, - {ZEROMV, ALTREF_FRAME, NONE}, - {ZEROMV, LAST_FRAME, ALTREF_FRAME}, - {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME}, - - {H_PRED, INTRA_FRAME, NONE}, - {V_PRED, INTRA_FRAME, NONE}, - {D135_PRED, INTRA_FRAME, NONE}, - {D207_PRED, INTRA_FRAME, NONE}, - {D153_PRED, INTRA_FRAME, NONE}, - {D63_PRED, INTRA_FRAME, NONE}, - {D117_PRED, INTRA_FRAME, NONE}, - {D45_PRED, INTRA_FRAME, NONE}, + {NEARESTMV, {LAST_FRAME, NONE}}, + {NEARESTMV, {ALTREF_FRAME, NONE}}, + {NEARESTMV, {GOLDEN_FRAME, NONE}}, + + {DC_PRED, {INTRA_FRAME, NONE}}, + + {NEWMV, {LAST_FRAME, NONE}}, + {NEWMV, {ALTREF_FRAME, NONE}}, + {NEWMV, {GOLDEN_FRAME, NONE}}, + + {NEARMV, {LAST_FRAME, NONE}}, + {NEARMV, {ALTREF_FRAME, NONE}}, + {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}}, + {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}}, + + {TM_PRED, {INTRA_FRAME, NONE}}, + + {NEARMV, {LAST_FRAME, ALTREF_FRAME}}, + {NEWMV, {LAST_FRAME, ALTREF_FRAME}}, + {NEARMV, {GOLDEN_FRAME, NONE}}, + {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}}, + {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}}, + + {ZEROMV, {LAST_FRAME, NONE}}, + {ZEROMV, {GOLDEN_FRAME, NONE}}, + {ZEROMV, {ALTREF_FRAME, NONE}}, + {ZEROMV, {LAST_FRAME, ALTREF_FRAME}}, + {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}}, + + {H_PRED, {INTRA_FRAME, NONE}}, + {V_PRED, {INTRA_FRAME, NONE}}, + {D135_PRED, {INTRA_FRAME, NONE}}, + {D207_PRED, {INTRA_FRAME, NONE}}, + {D153_PRED, {INTRA_FRAME, NONE}}, + {D63_PRED, {INTRA_FRAME, NONE}}, + {D117_PRED, {INTRA_FRAME, NONE}}, + {D45_PRED, {INTRA_FRAME, NONE}}, }; const REF_DEFINITION vp9_ref_order[MAX_REFS] = { - {LAST_FRAME, NONE}, - {GOLDEN_FRAME, NONE}, - {ALTREF_FRAME, NONE}, - {LAST_FRAME, ALTREF_FRAME}, - {GOLDEN_FRAME, ALTREF_FRAME}, - {INTRA_FRAME, NONE}, + {{LAST_FRAME, NONE}}, + {{GOLDEN_FRAME, NONE}}, + {{ALTREF_FRAME, NONE}}, + {{LAST_FRAME, ALTREF_FRAME}}, + {{GOLDEN_FRAME, ALTREF_FRAME}}, + {{INTRA_FRAME, NONE}}, }; // The baseline rd thresholds for breaking out of the rd loop for @@ -107,31 +137,58 @@ const REF_DEFINITION vp9_ref_order[MAX_REFS] = { static int rd_thresh_block_size_factor[BLOCK_SIZES] = {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; -#define RD_THRESH_MAX_FACT 64 -#define RD_THRESH_INC 1 -#define RD_THRESH_POW 1.25 -#define RD_MULT_EPB_RATIO 64 +static int raster_block_offset(BLOCK_SIZE plane_bsize, + int raster_block, int stride) { + const int bw = b_width_log2(plane_bsize); + const int y = 4 * (raster_block >> bw); + const int x = 4 * (raster_block & ((1 << bw) - 1)); + return y * stride + x; +} +static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize, + int raster_block, int16_t *base) { + const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + return base + raster_block_offset(plane_bsize, raster_block, stride); +} -#define MV_COST_WEIGHT 108 -#define MV_COST_WEIGHT_SUB 120 +static void fill_mode_costs(VP9_COMP *cpi) { + MACROBLOCK *const x = &cpi->mb; + const FRAME_CONTEXT *const fc = &cpi->common.fc; + int i, j; + + for (i = 0; i < INTRA_MODES; i++) + for (j = 0; j < INTRA_MODES; j++) + vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], + vp9_intra_mode_tree); + + // TODO(rbultje) separate tables for superblock costing? + vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); + vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME], + vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree); + vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME], + fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree); + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + vp9_cost_tokens((int *)x->switchable_interp_costs[i], + fc->switchable_interp_prob[i], vp9_switchable_interp_tree); +} static void fill_token_costs(vp9_coeff_cost *c, - vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { + vp9_coeff_probs_model (*p)[PLANE_TYPES]) { int i, j, k, l; TX_SIZE t; - for (t = TX_4X4; t <= TX_32X32; t++) - for (i = 0; i < BLOCK_TYPES; i++) - for (j = 0; j < REF_TYPES; j++) - for (k = 0; k < COEF_BANDS; k++) - for (l = 0; l < PREV_COEF_CONTEXTS; l++) { + for (t = TX_4X4; t <= TX_32X32; ++t) + for (i = 0; i < PLANE_TYPES; ++i) + for (j = 0; j < REF_TYPES; ++j) + for (k = 0; k < COEF_BANDS; ++k) + for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { vp9_prob probs[ENTROPY_NODES]; vp9_model_to_full_probs(p[t][i][j][k][l], probs); vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree); vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, vp9_coef_tree); - assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] == - c[t][i][j][k][1][l][DCT_EOB_TOKEN]); + assert(c[t][i][j][k][0][l][EOB_TOKEN] == + c[t][i][j][k][1][l][EOB_TOKEN]); } } @@ -155,13 +212,13 @@ void vp9_init_me_luts() { // This is to make it easier to resolve the impact of experimental changes // to the quantizer tables. for (i = 0; i < QINDEX_RANGE; i++) { - sad_per_bit16lut[i] = - (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107); - sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742); + const double q = vp9_convert_qindex_to_q(i); + sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107); + sad_per_bit4lut[i] = (int)(0.063 * q + 2.742); } } -int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) { +int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { const int q = vp9_dc_quant(qindex, 0); // TODO(debargha): Adjust the function below int rdmult = 88 * q * q / 25; @@ -175,12 +232,9 @@ int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) { } static int compute_rd_thresh_factor(int qindex) { - int q; // TODO(debargha): Adjust the function below - q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12); - if (q < 8) - q = 8; - return q; + const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12); + return MAX(q, 8); } void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { @@ -189,117 +243,88 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { } static void set_block_thresholds(VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; + const SPEED_FEATURES *const sf = &cpi->sf; int i, bsize, segment_id; - VP9_COMMON *cm = &cpi->common; for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { - int q; - int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); - segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ); - q = compute_rd_thresh_factor(segment_qindex); + const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id, + cm->base_qindex) + cm->y_dc_delta_q, + 0, MAXQ); + const int q = compute_rd_thresh_factor(qindex); for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { - // Threshold here seem unecessarily harsh but fine given actual - // range of values used for cpi->sf.thresh_mult[] - int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); - - for (i = 0; i < MAX_MODES; ++i) { - if (cpi->sf.thresh_mult[i] < thresh_max) { - cpi->rd_threshes[segment_id][bsize][i] = - cpi->sf.thresh_mult[i] * q * - rd_thresh_block_size_factor[bsize] / 4; - } else { - cpi->rd_threshes[segment_id][bsize][i] = INT_MAX; - } - } + // Threshold here seems unnecessarily harsh but fine given actual + // range of values used for cpi->sf.thresh_mult[]. + const int t = q * rd_thresh_block_size_factor[bsize]; + const int thresh_max = INT_MAX / t; + + for (i = 0; i < MAX_MODES; ++i) + cpi->rd_threshes[segment_id][bsize][i] = + sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4 + : INT_MAX; for (i = 0; i < MAX_REFS; ++i) { - if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) { - cpi->rd_thresh_sub8x8[segment_id][bsize][i] = - cpi->sf.thresh_mult_sub8x8[i] * q * - rd_thresh_block_size_factor[bsize] / 4; - } else { - cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX; - } + cpi->rd_thresh_sub8x8[segment_id][bsize][i] = + sf->thresh_mult_sub8x8[i] < thresh_max + ? sf->thresh_mult_sub8x8[i] * t / 4 + : INT_MAX; } } } } void vp9_initialize_rd_consts(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - int qindex, i; - - vp9_clear_system_state(); // __asm emms; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + int i; - // Further tests required to see if optimum is different - // for key frames, golden frames and arf frames. - // if (cpi->common.refresh_golden_frame || - // cpi->common.refresh_alt_ref_frame) - qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ); + vp9_clear_system_state(); cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) - cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex); + cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); - cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO; - cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); + x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO; + x->errorperbit += (x->errorperbit == 0); - vp9_set_speed_features(cpi); - - cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && - cm->frame_type != KEY_FRAME) ? - 0 : 1; + x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && + cm->frame_type != KEY_FRAME) ? 0 : 1; set_block_thresholds(cpi); - fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs); - - for (i = 0; i < PARTITION_CONTEXTS; i++) - vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i), - vp9_partition_tree); + if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) { + fill_token_costs(x->token_costs, cm->fc.coef_probs); - /*rough estimate for costing*/ - vp9_init_mode_costs(cpi); + for (i = 0; i < PARTITION_CONTEXTS; i++) + vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i), + vp9_partition_tree); + } - if (!frame_is_intra_only(cm)) { - vp9_build_nmv_cost_table( - cpi->mb.nmvjointcost, - cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost, - &cm->fc.nmvc, - cm->allow_high_precision_mv, 1, 1); + if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || + cm->frame_type == KEY_FRAME) { + fill_mode_costs(cpi); - for (i = 0; i < INTER_MODE_CONTEXTS; i++) { - MB_PREDICTION_MODE m; + if (!frame_is_intra_only(cm)) { + vp9_build_nmv_cost_table(x->nmvjointcost, + cm->allow_high_precision_mv ? x->nmvcost_hp + : x->nmvcost, + &cm->fc.nmvc, cm->allow_high_precision_mv); - for (m = NEARESTMV; m < MB_MODE_COUNT; m++) - cpi->mb.inter_mode_cost[i][INTER_OFFSET(m)] = - cost_token(vp9_inter_mode_tree, - cm->fc.inter_mode_probs[i], - &vp9_inter_mode_encodings[INTER_OFFSET(m)]); + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + vp9_cost_tokens((int *)x->inter_mode_cost[i], + cm->fc.inter_mode_probs[i], vp9_inter_mode_tree); } } } -static INLINE void linear_interpolate2(double x, int ntab, int inv_step, - const double *tab1, const double *tab2, - double *v1, double *v2) { - double y = x * inv_step; - int d = (int) y; - if (d >= ntab - 1) { - *v1 = tab1[ntab - 1]; - *v2 = tab2[ntab - 1]; - } else { - double a = y - d; - *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a; - *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a; - } -} +static const int MAX_XSQ_Q10 = 245727; -static void model_rd_norm(double x, double *R, double *D) { - static const int inv_tab_step = 8; - static const int tab_size = 120; +static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { // NOTE: The tables below must be of the same size - // + + // The functions described below are sampled at the four most significant + // bits of x^2 + 8 / 256 + // Normalized rate // This table models the rate for a Laplacian source // source with given variance when quantized with a uniform quantizer @@ -307,22 +332,20 @@ static void model_rd_norm(double x, double *R, double *D) { // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), // and H(x) is the binary entropy function. - static const double rate_tab[] = { - 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194, - 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206, - 1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708, - 0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412, - 0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236, - 0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132, - 0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073, - 0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040, - 0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022, - 0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012, - 0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006, - 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003, - 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, - 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, - 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000, + static const int rate_tab_q10[] = { + 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, + 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811, + 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, + 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, + 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130, + 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, + 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, + 1159, 1086, 1021, 963, 911, 864, 821, 781, + 745, 680, 623, 574, 530, 490, 455, 424, + 395, 345, 304, 269, 239, 213, 190, 171, + 154, 126, 104, 87, 73, 61, 52, 44, + 38, 28, 21, 16, 12, 10, 8, 6, + 5, 3, 2, 1, 1, 1, 0, 0, }; // Normalized distortion // This table models the normalized distortion for a Laplacian source @@ -331,54 +354,74 @@ static void model_rd_norm(double x, double *R, double *D) { // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) // where x = qpstep / sqrt(variance) // Note the actual distortion is Dn * variance. - static const double dist_tab[] = { - 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061, - 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242, - 0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458, - 0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645, - 0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780, - 0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870, - 0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925, - 0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957, - 0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976, - 0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987, - 0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993, - 0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996, - 0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998, - 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999, - 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000, + static const int dist_tab_q10[] = { + 0, 0, 1, 1, 1, 2, 2, 2, + 3, 3, 4, 5, 5, 6, 7, 7, + 8, 9, 11, 12, 13, 15, 16, 17, + 18, 21, 24, 26, 29, 31, 34, 36, + 39, 44, 49, 54, 59, 64, 69, 73, + 78, 88, 97, 106, 115, 124, 133, 142, + 151, 167, 184, 200, 215, 231, 245, 260, + 274, 301, 327, 351, 375, 397, 418, 439, + 458, 495, 528, 559, 587, 613, 637, 659, + 680, 717, 749, 777, 801, 823, 842, 859, + 874, 899, 919, 936, 949, 960, 969, 977, + 983, 994, 1001, 1006, 1010, 1013, 1015, 1017, + 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, + }; + static const int xsq_iq_q10[] = { + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 40, 48, 56, 64, 72, 80, 88, + 96, 112, 128, 144, 160, 176, 192, 208, + 224, 256, 288, 320, 352, 384, 416, 448, + 480, 544, 608, 672, 736, 800, 864, 928, + 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888, + 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808, + 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648, + 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, + 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, + 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408, + 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848, + 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728, }; /* - assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]); - assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]); - assert(sizeof(rate_tab) == sizeof(dist_tab)); + static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]); + assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size); + assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size); + assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]); */ - assert(x >= 0.0); - linear_interpolate2(x, tab_size, inv_tab_step, - rate_tab, dist_tab, R, D); + int tmp = (xsq_q10 >> 2) + 8; + int k = get_msb(tmp) - 3; + int xq = (k << 3) + ((tmp >> k) & 0x7); + const int one_q10 = 1 << 10; + const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k); + const int b_q10 = one_q10 - a_q10; + *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; + *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; } -static void model_rd_from_var_lapndz(int var, int n, int qstep, - int *rate, int64_t *dist) { +void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, + unsigned int qstep, int *rate, + int64_t *dist) { // This function models the rate and distortion for a Laplacian // source with given variance when quantized with a uniform quantizer // with given stepsize. The closed form expressions are in: // Hang and Chen, "Source Model for transform video coder and its // application - Part I: Fundamental Theory", IEEE Trans. Circ. // Sys. for Video Tech., April 1997. - vp9_clear_system_state(); - if (var == 0 || n == 0) { + if (var == 0) { *rate = 0; *dist = 0; } else { - double D, R; - double s2 = (double) var / n; - double x = qstep / sqrt(s2); - model_rd_norm(x, &R, &D); - *rate = (int)((n << 8) * R + 0.5); - *dist = (int)(var * D + 0.5); + int d_q10, r_q10; + const uint64_t xsq_q10_64 = + ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var; + const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ? + MAX_XSQ_Q10 : (int)xsq_q10_64; + model_rd_norm(xsq_q10, &r_q10, &d_q10); + *rate = (n * r_q10 + 2) >> 2; + *dist = (var * (int64_t)d_q10 + 512) >> 10; } - vp9_clear_system_state(); } static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, @@ -387,26 +430,48 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. - int i, rate_sum = 0, dist_sum = 0; + int i; + int64_t rate_sum = 0; + int64_t dist_sum = 0; + const int ref = xd->mi_8x8[0]->mbmi.ref_frame[0]; + unsigned int sse; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); - unsigned int sse; - int rate; - int64_t dist; + (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); - // sse works better than var, since there is no dc prediction used - model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], - pd->dequant[1] >> 3, &rate, &dist); - rate_sum += rate; - dist_sum += (int)dist; + if (i == 0) + x->pred_sse[ref] = sse; + + // Fast approximate the modelling function. + if (cpi->speed > 4) { + int64_t rate; + int64_t dist; + int64_t square_error = sse; + int quantizer = (pd->dequant[1] >> 3); + + if (quantizer < 120) + rate = (square_error * (280 - quantizer)) >> 8; + else + rate = 0; + dist = (square_error * quantizer) >> 8; + rate_sum += rate; + dist_sum += dist; + } else { + int rate; + int64_t dist; + vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], + pd->dequant[1] >> 3, &rate, &dist); + rate_sum += rate; + dist_sum += dist; + } } - *out_rate_sum = rate_sum; + *out_rate_sum = (int)rate_sum; *out_dist_sum = dist_sum << 4; } @@ -417,10 +482,10 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, int *out_skip) { int j, k; BLOCK_SIZE bs; - struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &xd->plane[0]; - const int width = 4 << num_4x4_blocks_wide_lookup[bsize]; - const int height = 4 << num_4x4_blocks_high_lookup[bsize]; + const struct macroblock_plane *const p = &x->plane[0]; + const struct macroblockd_plane *const pd = &xd->plane[0]; + const int width = 4 * num_4x4_blocks_wide_lookup[bsize]; + const int height = 4 * num_4x4_blocks_high_lookup[bsize]; int rate_sum = 0; int64_t dist_sum = 0; const int t = 4 << tx_size; @@ -447,7 +512,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride, &sse); // sse works better than var, since there is no dc prediction used - model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist); + vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, + &rate, &dist); rate_sum += rate; dist_sum += dist; *out_skip &= (rate < 1024); @@ -458,15 +524,15 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, *out_dist_sum = dist_sum << 4; } -int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, +int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz) { int i; int64_t error = 0, sqcoeff = 0; for (i = 0; i < block_size; i++) { - int this_diff = coeff[i] - dqcoeff[i]; - error += (unsigned)this_diff * this_diff; - sqcoeff += (unsigned) coeff[i] * coeff[i]; + const int diff = coeff[i] - dqcoeff[i]; + error += diff * diff; + sqcoeff += coeff[i] * coeff[i]; } *ssz = sqcoeff; @@ -484,40 +550,38 @@ static const int16_t band_counts[TX_SIZES][8] = { { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 }, }; - static INLINE int cost_coeffs(MACROBLOCK *x, int plane, int block, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, TX_SIZE tx_size, - const int16_t *scan, const int16_t *nb) { + const int16_t *scan, const int16_t *nb, + int use_fast_coef_costing) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; - struct macroblockd_plane *pd = &xd->plane[plane]; + const struct macroblock_plane *p = &x->plane[plane]; + const struct macroblockd_plane *pd = &xd->plane[plane]; const PLANE_TYPE type = pd->plane_type; const int16_t *band_count = &band_counts[tx_size][1]; - const int eob = pd->eobs[block]; - const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); - const int ref = mbmi->ref_frame[0] != INTRA_FRAME; - unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = - x->token_costs[tx_size][type][ref]; - const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L; + const int eob = p->eobs[block]; + const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = + x->token_costs[tx_size][type][is_inter_block(mbmi)]; uint8_t *p_tok = x->token_cache; - int pt = combine_entropy_contexts(above_ec, left_ec); + int pt = combine_entropy_contexts(*A, *L); int c, cost; - // Check for consistency of tx_size with mode info - assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size - : get_uv_tx_size(mbmi) == tx_size); + assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size + : get_uv_tx_size(mbmi) == tx_size); if (eob == 0) { // single eob token - cost = token_costs[0][0][pt][DCT_EOB_TOKEN]; + cost = token_costs[0][0][pt][EOB_TOKEN]; c = 0; } else { int band_left = *band_count++; // dc token - int v = qcoeff_ptr[0]; + int v = qcoeff[0]; int prev_t = vp9_dct_value_tokens_ptr[v].token; cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; p_tok[0] = vp9_pt_energy_class[prev_t]; @@ -528,11 +592,15 @@ static INLINE int cost_coeffs(MACROBLOCK *x, const int rc = scan[c]; int t; - v = qcoeff_ptr[rc]; + v = qcoeff[rc]; t = vp9_dct_value_tokens_ptr[v].token; - pt = get_coef_context(nb, p_tok, c); - cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; - p_tok[rc] = vp9_pt_energy_class[t]; + if (use_fast_coef_costing) { + cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v]; + } else { + pt = get_coef_context(nb, p_tok, c); + cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; + p_tok[rc] = vp9_pt_energy_class[t]; + } prev_t = t; if (!--band_left) { band_left = *band_count++; @@ -542,8 +610,12 @@ static INLINE int cost_coeffs(MACROBLOCK *x, // eob token if (band_left) { - pt = get_coef_context(nb, p_tok, c); - cost += (*token_costs)[0][pt][DCT_EOB_TOKEN]; + if (use_fast_coef_costing) { + cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; + } else { + pt = get_coef_context(nb, p_tok, c); + cost += (*token_costs)[0][pt][EOB_TOKEN]; + } } } @@ -552,24 +624,22 @@ static INLINE int cost_coeffs(MACROBLOCK *x, return cost; } - -static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) { +static void dist_block(int plane, int block, TX_SIZE tx_size, + struct rdcost_block_args* args) { const int ss_txfrm_size = tx_size << 1; - struct rdcost_block_args* args = arg; MACROBLOCK* const x = args->x; MACROBLOCKD* const xd = &x->e_mbd; - struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; int64_t this_sse; - int shift = args->tx_size == TX_32X32 ? 0 : 2; + int shift = tx_size == TX_32X32 ? 0 : 2; int16_t *const coeff = BLOCK_OFFSET(p->coeff, block); int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >> shift; args->sse = this_sse >> shift; - if (x->skip_encode && - xd->mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) { + if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) { // TODO(jingning): tune the model to better capture the distortion. int64_t p = (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> (shift + 2); @@ -579,32 +649,31 @@ static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) { } static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - struct rdcost_block_args* args = arg; - + TX_SIZE tx_size, struct rdcost_block_args* args) { int x_idx, y_idx; - txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx); + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx); args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx, - args->t_left + y_idx, args->tx_size, - args->scan, args->nb); + args->t_left + y_idx, tx_size, + args->so->scan, args->so->neighbors, + args->use_fast_coef_costing); } -static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { +static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - struct encode_b_args encode_args = {x, NULL}; + MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; int64_t rd1, rd2, rd; if (args->skip) return; - if (!is_inter_block(&xd->mi_8x8[0]->mbmi)) - vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args); + if (!is_inter_block(mbmi)) + vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip); else - vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args); + vp9_xform_quant(x, plane, block, plane_bsize, tx_size); dist_block(plane, block, tx_size, args); rate_block(plane, block, plane_bsize, tx_size, args); @@ -613,8 +682,9 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, // TODO(jingning): temporarily enabled only for luma component rd = MIN(rd1, rd2); - if (!xd->lossless && plane == 0) - x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block]; + if (plane == 0) + x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] || + (rd1 > rd2 && !xd->lossless); args->this_rate += args->rate; args->this_dist += args->dist; @@ -627,10 +697,16 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, } } -void vp9_get_entropy_contexts(TX_SIZE tx_size, - ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16], - const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left, - int num_4x4_w, int num_4x4_h) { +void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, + const struct macroblockd_plane *pd, + ENTROPY_CONTEXT t_above[16], + ENTROPY_CONTEXT t_left[16]) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const ENTROPY_CONTEXT *const above = pd->above_context; + const ENTROPY_CONTEXT *const left = pd->left_context; + int i; switch (tx_size) { case TX_4X4: @@ -656,57 +732,42 @@ void vp9_get_entropy_contexts(TX_SIZE tx_size, t_left[i] = !!*(const uint64_t *)&left[i]; break; default: - assert(!"Invalid transform size."); + assert(0 && "Invalid transform size."); } } -static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size, - const int num_4x4_w, const int num_4x4_h, - const int64_t ref_rdcost, - struct rdcost_block_args *arg) { - vpx_memset(arg, 0, sizeof(struct rdcost_block_args)); - arg->x = x; - arg->tx_size = tx_size; - arg->bw = num_4x4_w; - arg->bh = num_4x4_h; - arg->best_rd = ref_rdcost; -} - static void txfm_rd_in_plane(MACROBLOCK *x, - struct rdcost_block_args *rd_stack, int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, - BLOCK_SIZE bsize, TX_SIZE tx_size) { + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting) { MACROBLOCKD *const xd = &x->e_mbd; - struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); - const int num_4x4_w = num_4x4_blocks_wide_lookup[bs]; - const int num_4x4_h = num_4x4_blocks_high_lookup[bs]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + struct rdcost_block_args args = { 0 }; + args.x = x; + args.best_rd = ref_best_rd; + args.use_fast_coef_costing = use_fast_coef_casting; - init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h, - ref_best_rd, rd_stack); if (plane == 0) xd->mi_8x8[0]->mbmi.tx_size = tx_size; - vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left, - pd->above_context, pd->left_context, - num_4x4_w, num_4x4_h); + vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); - get_scan(xd, tx_size, pd->plane_type, 0, &rd_stack->scan, &rd_stack->nb); + args.so = get_scan(xd, tx_size, pd->plane_type, 0); - foreach_transformed_block_in_plane(xd, bsize, plane, - block_yrd_txfm, rd_stack); - if (rd_stack->skip) { + vp9_foreach_transformed_block_in_plane(xd, bsize, plane, + block_rd_txfm, &args); + if (args.skip) { *rate = INT_MAX; *distortion = INT64_MAX; *sse = INT64_MAX; *skippable = 0; } else { - *distortion = rd_stack->this_dist; - *rate = rd_stack->this_rate; - *sse = rd_stack->this_sse; - *skippable = vp9_is_skippable_in_plane(xd, bsize, plane); + *distortion = args.this_dist; + *rate = args.this_rate; + *sse = args.this_sse; + *skippable = vp9_is_skippable_in_plane(x, bsize, plane); } } @@ -723,9 +784,9 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = MIN(max_tx_size, largest_tx_size); - txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip, + txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size], ref_best_rd, 0, bs, - mbmi->tx_size); + mbmi->tx_size, cpi->sf.use_fast_coef_costing); cpi->tx_stepdown_count[0]++; } @@ -739,63 +800,49 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); - int64_t rd[TX_SIZES][2]; + vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); + int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, + {INT64_MAX, INT64_MAX}, + {INT64_MAX, INT64_MAX}, + {INT64_MAX, INT64_MAX}}; int n, m; int s0, s1; + const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; + int64_t best_rd = INT64_MAX; + TX_SIZE best_tx = TX_4X4; const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); - - for (n = TX_4X4; n <= max_tx_size; n++) { - r[n][1] = r[n][0]; - if (r[n][0] == INT_MAX) - continue; - for (m = 0; m <= n - (n == max_tx_size); m++) { - if (m == n) - r[n][1] += vp9_cost_zero(tx_probs[m]); - else - r[n][1] += vp9_cost_one(tx_probs[m]); - } - } - assert(skip_prob > 0); s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); for (n = TX_4X4; n <= max_tx_size; n++) { + r[n][1] = r[n][0]; + if (r[n][0] < INT_MAX) { + for (m = 0; m <= n - (n == max_tx_size); m++) { + if (m == n) + r[n][1] += vp9_cost_zero(tx_probs[m]); + else + r[n][1] += vp9_cost_one(tx_probs[m]); + } + } if (d[n] == INT64_MAX) { rd[n][0] = rd[n][1] = INT64_MAX; - continue; - } - if (s[n]) { + } else if (s[n]) { rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); } else { rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } - } - if (max_tx_size == TX_32X32 && - (cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && - rd[TX_32X32][1] < rd[TX_4X4][1]))) { - mbmi->tx_size = TX_32X32; - } else if (max_tx_size >= TX_16X16 && - (cm->tx_mode == ALLOW_16X16 || - cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_16X16][1] < rd[TX_8X8][1] && - rd[TX_16X16][1] < rd[TX_4X4][1]))) { - mbmi->tx_size = TX_16X16; - } else if (cm->tx_mode == ALLOW_8X8 || - cm->tx_mode == ALLOW_16X16 || - cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { - mbmi->tx_size = TX_8X8; - } else { - mbmi->tx_size = TX_4X4; + if (rd[n][1] < best_rd) { + best_tx = n; + best_rd = rd[n][1]; + } } + mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ? + best_tx : MIN(max_tx_size, max_mode_tx_size); + *distortion = d[mbmi->tx_size]; *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; @@ -805,33 +852,27 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, tx_cache[ALLOW_8X8] = rd[TX_8X8][0]; tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0]; tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0]; - if (max_tx_size == TX_32X32 && - rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && - rd[TX_32X32][1] < rd[TX_4X4][1]) - tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; - else if (max_tx_size >= TX_16X16 && - rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) - tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; - else - tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? - rd[TX_4X4][1] : rd[TX_8X8][1]; - if (max_tx_size == TX_32X32 && - rd[TX_32X32][1] < rd[TX_16X16][1] && - rd[TX_32X32][1] < rd[TX_8X8][1] && - rd[TX_32X32][1] < rd[TX_4X4][1]) { + if (max_tx_size == TX_32X32 && best_tx == TX_32X32) { + tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; cpi->tx_stepdown_count[0]++; - } else if (max_tx_size >= TX_16X16 && - rd[TX_16X16][1] < rd[TX_8X8][1] && - rd[TX_16X16][1] < rd[TX_4X4][1]) { + } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) { + tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) { + tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1]; cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; } else { + tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1]; cpi->tx_stepdown_count[max_tx_size - TX_4X4]++; } } +static int64_t scaled_rd_cost(int rdmult, int rddiv, + int rate, int64_t dist, double scale) { + return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale); +} + static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, int (*r)[2], int *rate, int64_t *d, int64_t *distortion, @@ -842,19 +883,25 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); - int64_t rd[TX_SIZES][2]; + vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); + int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, + {INT64_MAX, INT64_MAX}, + {INT64_MAX, INT64_MAX}, + {INT64_MAX, INT64_MAX}}; int n, m; int s0, s1; double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00}; - // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00}; + const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; + int64_t best_rd = INT64_MAX; + TX_SIZE best_tx = TX_4X4; const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); - - // for (n = TX_4X4; n <= max_txfm_size; n++) - // r[n][0] = (r[n][0] * scale_r[n]); + assert(skip_prob > 0); + s0 = vp9_cost_bit(skip_prob, 0); + s1 = vp9_cost_bit(skip_prob, 1); for (n = TX_4X4; n <= max_tx_size; n++) { + double scale = scale_rd[n]; r[n][1] = r[n][0]; for (m = 0; m <= n - (n == max_tx_size); m++) { if (m == n) @@ -862,62 +909,33 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, else r[n][1] += vp9_cost_one(tx_probs[m]); } - } - - assert(skip_prob > 0); - s0 = vp9_cost_bit(skip_prob, 0); - s1 = vp9_cost_bit(skip_prob, 1); - - for (n = TX_4X4; n <= max_tx_size; n++) { if (s[n]) { - rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); + rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n], + scale); } else { - rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); - rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); + rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n], + scale); + rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n], + scale); + } + if (rd[n][1] < best_rd) { + best_rd = rd[n][1]; + best_tx = n; } - } - for (n = TX_4X4; n <= max_tx_size; n++) { - rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]); - rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]); } - if (max_tx_size == TX_32X32 && - (cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_32X32][1] <= rd[TX_16X16][1] && - rd[TX_32X32][1] <= rd[TX_8X8][1] && - rd[TX_32X32][1] <= rd[TX_4X4][1]))) { - mbmi->tx_size = TX_32X32; - } else if (max_tx_size >= TX_16X16 && - (cm->tx_mode == ALLOW_16X16 || - cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_16X16][1] <= rd[TX_8X8][1] && - rd[TX_16X16][1] <= rd[TX_4X4][1]))) { - mbmi->tx_size = TX_16X16; - } else if (cm->tx_mode == ALLOW_8X8 || - cm->tx_mode == ALLOW_16X16 || - cm->tx_mode == ALLOW_32X32 || - (cm->tx_mode == TX_MODE_SELECT && - rd[TX_8X8][1] <= rd[TX_4X4][1])) { - mbmi->tx_size = TX_8X8; - } else { - mbmi->tx_size = TX_4X4; - } + mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ? + best_tx : MIN(max_tx_size, max_mode_tx_size); // Actually encode using the chosen mode if a model was used, but do not // update the r, d costs - txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip, - &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size); + txfm_rd_in_plane(x, rate, distortion, skip, + &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); - if (max_tx_size == TX_32X32 && - rd[TX_32X32][1] <= rd[TX_16X16][1] && - rd[TX_32X32][1] <= rd[TX_8X8][1] && - rd[TX_32X32][1] <= rd[TX_4X4][1]) { + if (max_tx_size == TX_32X32 && best_tx == TX_32X32) { cpi->tx_stepdown_count[0]++; - } else if (max_tx_size >= TX_16X16 && - rd[TX_16X16][1] <= rd[TX_8X8][1] && - rd[TX_16X16][1] <= rd[TX_4X4][1]) { + } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) { cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) { cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; @@ -926,25 +944,23 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, } } -static void super_block_yrd(VP9_COMP *cpi, - MACROBLOCK *x, int *rate, int64_t *distortion, - int *skip, int64_t *psse, BLOCK_SIZE bs, - int64_t txfm_cache[TX_MODES], - int64_t ref_best_rd) { +static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, + int64_t *distortion, int *skip, + int64_t *psse, BLOCK_SIZE bs, + int64_t txfm_cache[TX_MODES], + int64_t ref_best_rd) { int r[TX_SIZES][2], s[TX_SIZES]; int64_t d[TX_SIZES], sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack; - const int b_inter_mode = is_inter_block(mbmi); + const TX_SIZE max_tx_size = max_txsize_lookup[bs]; + TX_SIZE tx_size; assert(bs == mbmi->sb_type); - if (b_inter_mode) - vp9_subtract_sby(x, bs); - if (cpi->sf.tx_size_search_method == USE_LARGESTALL || - (cpi->sf.tx_size_search_method != USE_FULL_RD && - !b_inter_mode)) { + vp9_subtract_plane(x, bs, 0); + + if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, ref_best_rd, bs); @@ -953,36 +969,18 @@ static void super_block_yrd(VP9_COMP *cpi, return; } - if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER && - b_inter_mode) { - if (bs >= BLOCK_32X32) - model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd, - &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); - if (bs >= BLOCK_16X16) - model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd, - &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); - - model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd, - &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); - - model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd, - &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]); - + if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) { + for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) + model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd, + &r[tx_size][0], &d[tx_size], &s[tx_size]); choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, skip, sse, ref_best_rd, bs); } else { - if (bs >= BLOCK_32X32) - txfm_rd_in_plane(x, rdcost_stack, &r[TX_32X32][0], &d[TX_32X32], - &s[TX_32X32], &sse[TX_32X32], - ref_best_rd, 0, bs, TX_32X32); - if (bs >= BLOCK_16X16) - txfm_rd_in_plane(x, rdcost_stack, &r[TX_16X16][0], &d[TX_16X16], - &s[TX_16X16], &sse[TX_16X16], - ref_best_rd, 0, bs, TX_16X16); - txfm_rd_in_plane(x, rdcost_stack, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], - &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8); - txfm_rd_in_plane(x, rdcost_stack, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], - &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4); + for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) + txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], + &s[tx_size], &sse[tx_size], + ref_best_rd, 0, bs, tx_size, + cpi->sf.use_fast_coef_costing); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, bs); } @@ -990,6 +988,37 @@ static void super_block_yrd(VP9_COMP *cpi, *psse = sse[mbmi->tx_size]; } +static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, + int64_t *distortion, int *skip, + int64_t *psse, BLOCK_SIZE bs, + int64_t txfm_cache[TX_MODES], + int64_t ref_best_rd) { + int64_t sse[TX_SIZES]; + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + + assert(bs == mbmi->sb_type); + if (cpi->sf.tx_size_search_method != USE_FULL_RD) { + vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); + choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, + ref_best_rd, bs); + } else { + int r[TX_SIZES][2], s[TX_SIZES]; + int64_t d[TX_SIZES]; + TX_SIZE tx_size; + for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size) + txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], + &s[tx_size], &sse[tx_size], + ref_best_rd, 0, bs, tx_size, + cpi->sf.use_fast_coef_costing); + choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, + skip, txfm_cache, bs); + } + if (psse) + *psse = sse[mbmi->tx_size]; +} + + static int conditional_skipintra(MB_PREDICTION_MODE mode, MB_PREDICTION_MODE best_intra_mode) { if (mode == D117_PRED && @@ -1013,26 +1042,23 @@ static int conditional_skipintra(MB_PREDICTION_MODE mode, static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, MB_PREDICTION_MODE *best_mode, - int *bmode_costs, + const int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion, BLOCK_SIZE bsize, int64_t rd_thresh) { MB_PREDICTION_MODE mode; - MACROBLOCKD *xd = &x->e_mbd; + MACROBLOCKD *const xd = &x->e_mbd; int64_t best_rd = rd_thresh; - int rate = 0; - int64_t distortion; + struct macroblock_plane *p = &x->plane[0]; struct macroblockd_plane *pd = &xd->plane[0]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; - uint8_t *src_init = raster_block_offset_uint8(BLOCK_8X8, ib, - p->src.buf, src_stride); - uint8_t *dst_init = raster_block_offset_uint8(BLOCK_8X8, ib, - pd->dst.buf, dst_stride); - int16_t *src_diff, *coeff; - + const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib, + src_stride)]; + uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib, + dst_stride)]; ENTROPY_CONTEXT ta[2], tempa[2]; ENTROPY_CONTEXT tl[2], templ[2]; @@ -1050,6 +1076,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; int ratey = 0; + int64_t distortion = 0; + int rate = bmode_costs[mode]; if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue; @@ -1061,56 +1089,52 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, continue; } - rate = bmode_costs[mode]; - distortion = 0; - vpx_memcpy(tempa, ta, sizeof(ta)); vpx_memcpy(templ, tl, sizeof(tl)); for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { - int64_t ssz; - const int16_t *scan; - const int16_t *nb; - uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride; - uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride; const int block = ib + idy * 2 + idx; - TX_TYPE tx_type; + const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; + uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; + int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block, + p->src_diff); + int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); xd->mi_8x8[0]->bmi[block].as_mode = mode; - src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); - coeff = BLOCK_OFFSET(x->plane[0].coeff, block); vp9_predict_intra_block(xd, block, 1, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, - dst, dst_stride); - vp9_subtract_block(4, 4, src_diff, 8, - src, src_stride, - dst, dst_stride); - - tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block); - get_scan_nb_4x4(tx_type, &scan, &nb); - - if (tx_type != DCT_DCT) - vp9_short_fht4x4(src_diff, coeff, 8, tx_type); - else - x->fwd_txm4x4(src_diff, coeff, 8); - - vp9_regular_quantize_b_4x4(x, 4, block, scan, get_iscan_4x4(tx_type)); - - ratey += cost_coeffs(x, 0, block, - tempa + idx, templ + idy, TX_4X4, scan, nb); - distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), - 16, &ssz) >> 2; - if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) - goto next; - - if (tx_type != DCT_DCT) - vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block), - dst, pd->dst.stride, tx_type); - else - xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride, - 16); + dst, dst_stride, idx, idy, 0); + vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride); + + if (xd->lossless) { + const scan_order *so = &vp9_default_scan_orders[TX_4X4]; + vp9_fwht4x4(src_diff, coeff, 8); + vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, + so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); + if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) + goto next; + vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, + p->eobs[block]); + } else { + int64_t unused; + const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); + const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type]; + vp9_fht4x4(src_diff, coeff, 8, tx_type); + vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, + so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); + distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), + 16, &unused) >> 2; + if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) + goto next; + vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), + dst, dst_stride, p->eobs[block]); + } } } @@ -1143,14 +1167,12 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, return best_rd; } -static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi, - MACROBLOCK * const mb, - int * const rate, - int * const rate_y, - int64_t * const distortion, +static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, + int *rate, int *rate_y, + int64_t *distortion, int64_t best_rd) { int i, j; - MACROBLOCKD *const xd = &mb->e_mbd; + const MACROBLOCKD *const xd = &mb->e_mbd; MODE_INFO *const mic = xd->mi_8x8[0]; const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; @@ -1163,13 +1185,11 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi, int tot_rate_y = 0; int64_t total_rd = 0; ENTROPY_CONTEXT t_above[4], t_left[4]; - int *bmode_costs; + const int *bmode_costs = mb->mbmode_cost; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); - bmode_costs = mb->mbmode_cost; - // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { @@ -1178,8 +1198,8 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi, int64_t d = INT64_MAX, this_rd = INT64_MAX; i = idy * 2 + idx; if (cpi->common.frame_type == KEY_FRAME) { - const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, i); - const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, i); + const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i); + const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i); bmode_costs = mb->y_mode_costs[A][L]; } @@ -1244,15 +1264,15 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; if (cpi->common.frame_type == KEY_FRAME) { - const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, 0); - const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, 0); + const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); + const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); bmode_costs = x->y_mode_costs[A][L]; } mic->mbmi.mode = mode; - super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, - bsize, local_tx_cache, best_rd); + intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, local_tx_cache, best_rd); if (this_rate_tokenonly == INT_MAX) continue; @@ -1287,7 +1307,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } -static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x, +static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, BLOCK_SIZE bsize, int64_t ref_best_rd) { @@ -1301,8 +1321,11 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x, if (ref_best_rd < 0) goto term; - if (is_inter_block(mbmi)) - vp9_subtract_sbuv(x, bsize); + if (is_inter_block(mbmi)) { + int plane; + for (plane = 1; plane < MAX_MB_PLANE; ++plane) + vp9_subtract_plane(x, bsize, plane); + } *rate = 0; *distortion = 0; @@ -1310,8 +1333,9 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x, *skippable = 1; for (plane = 1; plane < MAX_MB_PLANE; ++plane) { - txfm_rd_in_plane(x, &cpi->rdcost_stack, &pnrate, &pndist, &pnskip, &pnsse, - ref_best_rd, plane, bsize, uv_txfm_size); + txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, + ref_best_rd, plane, bsize, uv_txfm_size, + cpi->sf.use_fast_coef_costing); if (pnrate == INT_MAX) goto term; *rate += pnrate; @@ -1333,23 +1357,19 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize, TX_SIZE max_tx_size) { + MACROBLOCKD *xd = &x->e_mbd; MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode_selected = DC_PRED; int64_t best_rd = INT64_MAX, this_rd; int this_rate_tokenonly, this_rate, s; int64_t this_distortion, this_sse; - // int mode_mask = (bsize <= BLOCK_8X8) - // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask; - - for (mode = DC_PRED; mode <= TM_PRED; mode ++) { - // if (!(mode_mask & (1 << mode))) - if (!(cpi->sf.intra_uv_mode_mask[max_uv_txsize_lookup[bsize]] - & (1 << mode))) + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { + if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; - x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode; + xd->mi_8x8[0]->mbmi.uv_mode = mode; super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd); @@ -1369,12 +1389,12 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!x->select_txfm_size) { int i; struct macroblock_plane *const p = x->plane; - struct macroblockd_plane *const pd = x->e_mbd.plane; + struct macroblockd_plane *const pd = xd->plane; for (i = 1; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][2]; - pd[i].qcoeff = ctx->qcoeff_pbuf[i][2]; + p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; - pd[i].eobs = ctx->eobs_pbuf[i][2]; + p[i].eobs = ctx->eobs_pbuf[i][2]; ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0]; ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0]; @@ -1382,39 +1402,35 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0]; ctx->coeff_pbuf[i][0] = p[i].coeff; - ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff; + ctx->qcoeff_pbuf[i][0] = p[i].qcoeff; ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; - ctx->eobs_pbuf[i][0] = pd[i].eobs; + ctx->eobs_pbuf[i][0] = p[i].eobs; } } } } - x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected; - + xd->mi_8x8[0]->mbmi.uv_mode = mode_selected; return best_rd; } -static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x, +static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize) { - int64_t this_rd; - int64_t this_sse; + const VP9_COMMON *cm = &cpi->common; + int64_t unused; x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED; super_block_uvrd(cpi, x, rate_tokenonly, distortion, - skippable, &this_sse, bsize, INT64_MAX); - *rate = *rate_tokenonly + - x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED]; - this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion); - - return this_rd; + skippable, &unused, bsize, INT64_MAX); + *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED]; + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, - BLOCK_SIZE bsize, int *rate_uv, - int *rate_uv_tokenonly, + BLOCK_SIZE bsize, TX_SIZE max_tx_size, + int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv, MB_PREDICTION_MODE *mode_uv) { MACROBLOCK *const x = &cpi->mb; @@ -1422,14 +1438,14 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. if (cpi->sf.use_uv_intra_rd_estimate) { - rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, - bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); + rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, + skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); // Else do a proper rd search for each possible transform size that may // be considered in the main rd loop. } else { rd_pick_intra_sbuv_mode(cpi, x, ctx, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, - bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); + bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size); } *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode; } @@ -1437,8 +1453,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, int mode_context) { MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int segment_id = xd->mi_8x8[0]->mbmi.segment_id; + const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id; // Don't account for mode here if segment skip is enabled. if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { @@ -1449,11 +1464,6 @@ static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, } } -void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { - x->e_mbd.mi_8x8[0]->mbmi.mode = mb; - x->e_mbd.mi_8x8[0]->mbmi.mv[0].as_int = mv->as_int; -} - static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv, @@ -1461,79 +1471,66 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv single_newmv[MAX_REF_FRAMES], int *rate_mv); -static int labels2mode(MACROBLOCK *x, int i, - MB_PREDICTION_MODE this_mode, - int_mv *this_mv, int_mv *this_second_mv, +static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i, + MB_PREDICTION_MODE mode, + int_mv this_mv[2], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int_mv seg_mvs[MAX_REF_FRAMES], - int_mv *best_ref_mv, - int_mv *second_best_ref_mv, - int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { - MACROBLOCKD *const xd = &x->e_mbd; + int_mv *best_ref_mv[2], + const int *mvjcost, int *mvcost[2]) { MODE_INFO *const mic = xd->mi_8x8[0]; - MB_MODE_INFO *mbmi = &mic->mbmi; - int cost = 0, thismvcost = 0; + const MB_MODE_INFO *const mbmi = &mic->mbmi; + int thismvcost = 0; int idx, idy; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; - const int has_second_rf = has_second_ref(mbmi); - - /* We have to be careful retrieving previously-encoded motion vectors. - Ones from this macroblock have to be pulled from the BLOCKD array - as they have not yet made it to the bmi array in our MB_MODE_INFO. */ - MB_PREDICTION_MODE m; + const int is_compound = has_second_ref(mbmi); // the only time we should do costing for new motion vector or mode // is when we are on a new label (jbb May 08, 2007) - switch (m = this_mode) { + switch (mode) { case NEWMV: - this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; - thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv, + this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int; + thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, mvjcost, mvcost, MV_COST_WEIGHT_SUB); - if (has_second_rf) { - this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; - thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv, - &second_best_ref_mv->as_mv, + if (is_compound) { + this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int; + thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, mvjcost, mvcost, MV_COST_WEIGHT_SUB); } break; case NEARESTMV: - this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; - if (has_second_rf) - this_second_mv->as_int = - frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; + this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; + if (is_compound) + this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; break; case NEARMV: - this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; - if (has_second_rf) - this_second_mv->as_int = - frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; + this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; + if (is_compound) + this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; break; case ZEROMV: - this_mv->as_int = 0; - if (has_second_rf) - this_second_mv->as_int = 0; + this_mv[0].as_int = 0; + if (is_compound) + this_mv[1].as_int = 0; break; default: break; } - cost = cost_mv_ref(cpi, this_mode, - mbmi->mode_context[mbmi->ref_frame[0]]); - - mic->bmi[i].as_mv[0].as_int = this_mv->as_int; - if (has_second_rf) - mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; + mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int; + if (is_compound) + mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int; - mic->bmi[i].as_mode = m; + mic->bmi[i].as_mode = mode; for (idy = 0; idy < num_4x4_blocks_high; ++idy) for (idx = 0; idx < num_4x4_blocks_wide; ++idx) vpx_memcpy(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i])); - cost += thismvcost; - return cost; + return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) + + thismvcost; } static int64_t encode_inter_mb_segment(VP9_COMP *cpi, @@ -1543,32 +1540,36 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, int *labelyrate, int64_t *distortion, int64_t *sse, ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl) { + ENTROPY_CONTEXT *tl, + int mi_row, int mi_col) { int k; MACROBLOCKD *xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; MODE_INFO *const mi = xd->mi_8x8[0]; - const BLOCK_SIZE bsize = mi->mbmi.sb_type; - const int width = plane_block_width(bsize, pd); - const int height = plane_block_height(bsize, pd); + const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); + const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; int idx, idy; - uint8_t *const src = raster_block_offset_uint8(BLOCK_8X8, i, - p->src.buf, p->src.stride); - uint8_t *const dst = raster_block_offset_uint8(BLOCK_8X8, i, - pd->dst.buf, pd->dst.stride); + const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i, + p->src.stride)]; + uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i, + pd->dst.stride)]; int64_t thisdistortion = 0, thissse = 0; int thisrate = 0, ref; + const scan_order *so = &vp9_default_scan_orders[TX_4X4]; const int is_compound = has_second_ref(&mi->mbmi); for (ref = 0; ref < 1 + is_compound; ++ref) { - const uint8_t *pre = raster_block_offset_uint8(BLOCK_8X8, i, - pd->pre[ref].buf, pd->pre[ref].stride); + const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i, + pd->pre[ref].stride)]; vp9_build_inter_predictor(pre, pd->pre[ref].stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, - &xd->scale_factor[ref], - width, height, ref, &xd->subpix, MV_PRECISION_Q3); + &xd->block_refs[ref]->sf, width, height, ref, + xd->interp_kernel, MV_PRECISION_Q3, + mi_col * MI_SIZE + 4 * (i % 2), + mi_row * MI_SIZE + 4 * (i / 2)); } vp9_subtract_block(height, width, @@ -1586,16 +1587,13 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, coeff = BLOCK_OFFSET(p->coeff, k); x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), coeff, 8); - vp9_regular_quantize_b_4x4(x, 4, k, get_scan_4x4(DCT_DCT), - get_iscan_4x4(DCT_DCT)); + vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan); thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz); thissse += ssz; - thisrate += cost_coeffs(x, 0, k, - ta + (k & 1), - tl + (k >> 1), TX_4X4, - vp9_default_scan_4x4, - vp9_default_scan_4x4_neighbors); + thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4, + so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); rd = MIN(rd1, rd2); @@ -1624,7 +1622,7 @@ typedef struct { } SEG_RDSTAT; typedef struct { - int_mv *ref_mv, *second_ref_mv; + int_mv *ref_mv[2]; int_mv mvp; int64_t segment_rd; @@ -1637,13 +1635,11 @@ typedef struct { int mvthresh; } BEST_SEG_INFO; -static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { - int r = 0; - r |= (mv->as_mv.row >> 3) < x->mv_row_min; - r |= (mv->as_mv.row >> 3) > x->mv_row_max; - r |= (mv->as_mv.col >> 3) < x->mv_col_min; - r |= (mv->as_mv.col >> 3) > x->mv_col_max; - return r; +static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) { + return (mv->row >> 3) < x->mv_row_min || + (mv->row >> 3) > x->mv_row_max || + (mv->col >> 3) < x->mv_col_min || + (mv->col >> 3) > x->mv_col_max; } static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { @@ -1651,14 +1647,13 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; - p->src.buf = raster_block_offset_uint8(BLOCK_8X8, i, p->src.buf, - p->src.stride); + p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)]; assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); - pd->pre[0].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[0].buf, - pd->pre[0].stride); + pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i, + pd->pre[0].stride)]; if (has_second_ref(mbmi)) - pd->pre[1].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[1].buf, - pd->pre[1].stride); + pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i, + pd->pre[1].stride)]; } static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, @@ -1670,17 +1665,24 @@ static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, x->e_mbd.plane[0].pre[1] = orig_pre[1]; } +static INLINE int mv_has_subpel(const MV *mv) { + return (mv->row & 0x0F) || (mv->col & 0x0F); +} + static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BEST_SEG_INFO *bsi_buf, int filter_idx, int_mv seg_mvs[4][MAX_REF_FRAMES], int mi_row, int mi_col) { - int i, br = 0, idx, idy; + int k, br = 0, idx, idy; int64_t bd = 0, block_sse = 0; MB_PREDICTION_MODE this_mode; - MODE_INFO *mi = x->e_mbd.mi_8x8[0]; + MACROBLOCKD *xd = &x->e_mbd; + VP9_COMMON *cm = &cpi->common; + MODE_INFO *mi = xd->mi_8x8[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; - struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; + struct macroblock_plane *const p = &x->plane[0]; + struct macroblockd_plane *const pd = &xd->plane[0]; const int label_count = 4; int64_t this_segment_rd = 0; int label_mv_thresh; @@ -1688,18 +1690,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize = mbmi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - vp9_variance_fn_ptr_t *v_fn_ptr; + vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize]; ENTROPY_CONTEXT t_above[2], t_left[2]; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; int mode_idx; int subpelmv = 1, have_ref = 0; const int has_second_rf = has_second_ref(mbmi); + const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; vpx_memcpy(t_above, pd->above_context, sizeof(t_above)); vpx_memcpy(t_left, pd->left_context, sizeof(t_left)); - v_fn_ptr = &cpi->fn_ptr[bsize]; - // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on // segments. setting this to 1 would make mv thresh @@ -1711,24 +1712,21 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { // TODO(jingning,rbultje): rewrite the rate-distortion optimization // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop - int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT]; + int_mv mode_mv[MB_MODE_COUNT][2]; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; MB_PREDICTION_MODE mode_selected = ZEROMV; int64_t best_rd = INT64_MAX; - i = idy * 2 + idx; - - frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0; - vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile, - &frame_mv[NEARESTMV][mbmi->ref_frame[0]], - &frame_mv[NEARMV][mbmi->ref_frame[0]], - i, 0, mi_row, mi_col); - if (has_second_rf) { - frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0; - vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile, - &frame_mv[NEARESTMV][mbmi->ref_frame[1]], - &frame_mv[NEARMV][mbmi->ref_frame[1]], - i, 1, mi_row, mi_col); + const int i = idy * 2 + idx; + int ref; + + for (ref = 0; ref < 1 + has_second_rf; ++ref) { + const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; + frame_mv[ZEROMV][frame].as_int = 0; + vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col, + &frame_mv[NEARESTMV][frame], + &frame_mv[NEARMV][frame]); } + // search for the best motion vector on this segment for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { const struct buf_2d orig_src = x->plane[0].src; @@ -1736,9 +1734,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, mode_idx = INTER_OFFSET(this_mode); bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; + if (disable_inter_mode_mask & (1 << mode_idx)) + continue; // if we're near/nearest and mv == 0,0, compare to zeromv - if ((this_mode == NEARMV || this_mode == NEARESTMV || + if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 && (!has_second_rf || @@ -1783,11 +1784,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // motion search for newmv (single predictor case only) if (!has_second_rf && this_mode == NEWMV && seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { + int_mv *const new_mv = &mode_mv[NEWMV][0]; int step_param = 0; int further_steps; int thissme, bestsme = INT_MAX; int sadpb = x->sadperbit4; - int_mv mvp_full; + MV mvp_full; int max_mv; /* Is the best so far sufficiently good that we cant justify doing @@ -1795,7 +1797,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (best_rd < label_mv_thresh) break; - if (cpi->compressor_speed) { + if (cpi->oxcf.mode != MODE_SECONDPASS_BEST && + cpi->oxcf.mode != MODE_BESTQUALITY) { // use previous block's result as next block's MV predictor. if (i > 0) { bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; @@ -1808,7 +1811,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, else max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; - if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { + if (cpi->sf.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and the best ref mvs of the current block for // the given reference. @@ -1818,81 +1821,99 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, step_param = cpi->mv_step_param; } - mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; - mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; + mvp_full.row = bsi->mvp.as_mv.row >> 3; + mvp_full.col = bsi->mvp.as_mv.col >> 3; - if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) { - mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3; - mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3; + if (cpi->sf.adaptive_motion_search && cm->show_frame) { + mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3; + mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3; step_param = MAX(step_param, 8); } further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; // adjust src pointer for this block mi_buf_shift(x, i); + + vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv); + if (cpi->sf.search_method == HEX) { - bestsme = vp9_hex_search(x, &mvp_full.as_mv, + bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, v_fn_ptr, 1, - &bsi->ref_mv->as_mv, - &mode_mv[NEWMV].as_mv); + &bsi->ref_mv[0]->as_mv, + &new_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, + &bsi->ref_mv[0]->as_mv, + v_fn_ptr, 1); } else if (cpi->sf.search_method == SQUARE) { - bestsme = vp9_square_search(x, &mvp_full.as_mv, + bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1, v_fn_ptr, 1, - &bsi->ref_mv->as_mv, - &mode_mv[NEWMV].as_mv); + &bsi->ref_mv[0]->as_mv, + &new_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, + &bsi->ref_mv[0]->as_mv, + v_fn_ptr, 1); } else if (cpi->sf.search_method == BIGDIA) { - bestsme = vp9_bigdia_search(x, &mvp_full.as_mv, + bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, v_fn_ptr, 1, - &bsi->ref_mv->as_mv, - &mode_mv[NEWMV].as_mv); + &bsi->ref_mv[0]->as_mv, + &new_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, + &bsi->ref_mv[0]->as_mv, + v_fn_ptr, 1); } else { bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 0, v_fn_ptr, - bsi->ref_mv, &mode_mv[NEWMV]); + &bsi->ref_mv[0]->as_mv, + &new_mv->as_mv); } // Should we do a full search (best quality only) - if (cpi->compressor_speed == 0) { + if (cpi->oxcf.mode == MODE_BESTQUALITY || + cpi->oxcf.mode == MODE_SECONDPASS_BEST) { + int_mv *const best_mv = &mi->bmi[i].as_mv[0]; /* Check if mvp_full is within the range. */ - clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max, + clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - thissme = cpi->full_search_sad(x, &mvp_full, sadpb, 16, v_fn_ptr, x->nmvjointcost, x->mvcost, - bsi->ref_mv, i); - + &bsi->ref_mv[0]->as_mv, + &best_mv->as_mv); if (thissme < bestsme) { bestsme = thissme; - mode_mv[NEWMV].as_int = mi->bmi[i].as_mv[0].as_int; + new_mv->as_int = best_mv->as_int; } else { - /* The full search result is actually worse so re-instate the - * previous best vector */ - mi->bmi[i].as_mv[0].as_int = mode_mv[NEWMV].as_int; + // The full search result is actually worse so re-instate the + // previous best vector + best_mv->as_int = new_mv->as_int; } } if (bestsme < INT_MAX) { int distortion; - unsigned int sse; cpi->find_fractional_mv_step(x, - &mode_mv[NEWMV].as_mv, - &bsi->ref_mv->as_mv, - cpi->common.allow_high_precision_mv, + &new_mv->as_mv, + &bsi->ref_mv[0]->as_mv, + cm->allow_high_precision_mv, x->errorperbit, v_fn_ptr, - 0, cpi->sf.subpel_iters_per_step, + cpi->sf.subpel_force_stop, + cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, - &distortion, &sse); + &distortion, + &x->pred_sse[mbmi->ref_frame[0]]); // save motion search result for use in compound prediction - seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; + seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int; } if (cpi->sf.adaptive_motion_search) - x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; + x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int; // restore src pointers mi_buf_restore(x, orig_src, orig_pre); @@ -1923,58 +1944,43 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, } bsi->rdstat[i][mode_idx].brate = - labels2mode(x, i, this_mode, &mode_mv[this_mode], - &second_mode_mv[this_mode], frame_mv, seg_mvs[i], - bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, - x->mvcost, cpi); - - - bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int; - if (num_4x4_blocks_wide > 1) - bsi->rdstat[i + 1][mode_idx].mvs[0].as_int = - mode_mv[this_mode].as_int; - if (num_4x4_blocks_high > 1) - bsi->rdstat[i + 2][mode_idx].mvs[0].as_int = - mode_mv[this_mode].as_int; - if (has_second_rf) { - bsi->rdstat[i][mode_idx].mvs[1].as_int = - second_mode_mv[this_mode].as_int; + labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv, + seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost); + + for (ref = 0; ref < 1 + has_second_rf; ++ref) { + bsi->rdstat[i][mode_idx].mvs[ref].as_int = + mode_mv[this_mode][ref].as_int; if (num_4x4_blocks_wide > 1) - bsi->rdstat[i + 1][mode_idx].mvs[1].as_int = - second_mode_mv[this_mode].as_int; + bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int = + mode_mv[this_mode][ref].as_int; if (num_4x4_blocks_high > 1) - bsi->rdstat[i + 2][mode_idx].mvs[1].as_int = - second_mode_mv[this_mode].as_int; + bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int = + mode_mv[this_mode][ref].as_int; } // Trap vectors that reach beyond the UMV borders - if (mv_check_bounds(x, &mode_mv[this_mode])) - continue; - if (has_second_rf && - mv_check_bounds(x, &second_mode_mv[this_mode])) + if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) || + (has_second_rf && + mv_check_bounds(x, &mode_mv[this_mode][1].as_mv))) continue; if (filter_idx > 0) { BEST_SEG_INFO *ref_bsi = bsi_buf; - subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) || - (mode_mv[this_mode].as_mv.col & 0x0f); - have_ref = mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; - if (has_second_rf) { - subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) || - (second_mode_mv[this_mode].as_mv.col & 0x0f); - have_ref &= second_mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; + subpelmv = 0; + have_ref = 1; + + for (ref = 0; ref < 1 + has_second_rf; ++ref) { + subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv); + have_ref &= mode_mv[this_mode][ref].as_int == + ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; } if (filter_idx > 1 && !subpelmv && !have_ref) { ref_bsi = bsi_buf + 1; - have_ref = mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; - if (has_second_rf) { - have_ref &= second_mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; - } + have_ref = 1; + for (ref = 0; ref < 1 + has_second_rf; ++ref) + have_ref &= mode_mv[this_mode][ref].as_int == + ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; } if (!subpelmv && have_ref && @@ -2003,16 +2009,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, &bsi->rdstat[i][mode_idx].bdist, &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta, - bsi->rdstat[i][mode_idx].tl); + bsi->rdstat[i][mode_idx].tl, + mi_row, mi_col); if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0); bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate; - bsi->rdstat[i][mode_idx].eobs = pd->eobs[i]; + bsi->rdstat[i][mode_idx].eobs = p->eobs[i]; if (num_4x4_blocks_wide > 1) - bsi->rdstat[i + 1][mode_idx].eobs = pd->eobs[i + 1]; + bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1]; if (num_4x4_blocks_high > 1) - bsi->rdstat[i + 2][mode_idx].eobs = pd->eobs[i + 2]; + bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2]; } if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { @@ -2034,10 +2041,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); - labels2mode(x, i, mode_selected, &mode_mv[mode_selected], - &second_mode_mv[mode_selected], frame_mv, seg_mvs[i], - bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, - x->mvcost, cpi); + labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected], + frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, + x->mvcost); br += bsi->rdstat[i][mode_idx].brate; bd += bsi->rdstat[i][mode_idx].bdist; @@ -2063,8 +2069,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, bsi->sse = block_sse; // update the coding decisions - for (i = 0; i < 4; ++i) - bsi->modes[i] = mi->bmi[i].as_mode; + for (k = 0; k < 4; ++k) + bsi->modes[k] = mi->bmi[k].as_mode; } static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, @@ -2091,8 +2097,8 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(*bsi); bsi->segment_rd = best_rd; - bsi->ref_mv = best_ref_mv; - bsi->second_ref_mv = second_best_ref_mv; + bsi->ref_mv[0] = best_ref_mv; + bsi->ref_mv[1] = second_best_ref_mv; bsi->mvp.as_int = best_ref_mv->as_int; bsi->mvthresh = mvthresh; @@ -2110,7 +2116,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; if (has_second_ref(mbmi)) mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; - xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; + x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; mi->bmi[i].as_mode = bsi->modes[i]; } @@ -2120,7 +2126,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, *returntotrate = bsi->r; *returndistortion = bsi->d; *returnyrate = bsi->segment_yrate; - *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0); + *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0); *psse = bsi->sse; mbmi->mode = bsi->modes[3]; @@ -2138,7 +2144,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, int best_index = 0; int best_sad = INT_MAX; int this_sad = INT_MAX; - unsigned int max_mv = 0; + int max_mv = 0; uint8_t *src_y_ptr = x->plane[0].src.buf; uint8_t *ref_y_ptr; @@ -2148,16 +2154,21 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, cpi->common.show_frame && block_size < cpi->sf.max_partition_size); + int_mv pred_mv[3]; + pred_mv[0] = mbmi->ref_mvs[ref_frame][0]; + pred_mv[1] = mbmi->ref_mvs[ref_frame][1]; + pred_mv[2] = x->pred_mv[ref_frame]; + // Get the sad for each candidate reference mv for (i = 0; i < num_mv_refs; i++) { - this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ? - mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int; + this_mv.as_int = pred_mv[i].as_int; max_mv = MAX(max_mv, MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3); - // The list is at an end if we see 0 for a second time. + // only need to check zero mv once if (!this_mv.as_int && zero_seen) - break; + continue; + zero_seen = zero_seen || !this_mv.as_int; row_offset = this_mv.as_mv.row >> 3; @@ -2179,6 +2190,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, // Note the index of the mv that worked best in the reference list. x->mv_best_ref_index[ref_frame] = best_index; x->max_mv_context[ref_frame] = max_mv; + x->pred_mv_sad[ref_frame] = best_sad; } static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, @@ -2194,11 +2206,11 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); *comp_mode_p = 128; } else { - vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd); + vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd); vp9_prob comp_inter_p = 128; - if (cm->comp_pred_mode == HYBRID_PREDICTION) { - comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd); + if (cm->reference_mode == REFERENCE_MODE_SELECT) { + comp_inter_p = vp9_get_reference_mode_prob(cm, xd); *comp_mode_p = comp_inter_p; } else { *comp_mode_p = 128; @@ -2206,12 +2218,12 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); - if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) { + if (cm->reference_mode != COMPOUND_REFERENCE) { vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); - if (cm->comp_pred_mode == HYBRID_PREDICTION) + if (cm->reference_mode == REFERENCE_MODE_SELECT) base_cost += vp9_cost_bit(comp_inter_p, 0); ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = @@ -2226,11 +2238,11 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, ref_costs_single[GOLDEN_FRAME] = 512; ref_costs_single[ALTREF_FRAME] = 512; } - if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) { + if (cm->reference_mode != SINGLE_REFERENCE) { vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd); unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); - if (cm->comp_pred_mode == HYBRID_PREDICTION) + if (cm->reference_mode == REFERENCE_MODE_SELECT) base_cost += vp9_cost_bit(comp_inter_p, 1); ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); @@ -2246,7 +2258,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, int_mv *ref_mv, int_mv *second_ref_mv, - int64_t comp_pred_diff[NB_PREDICTION_TYPES], + int64_t comp_pred_diff[REFERENCE_MODES], int64_t tx_size_diff[TX_MODES], int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { MACROBLOCKD *const xd = &x->e_mbd; @@ -2257,12 +2269,12 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->best_mode_index = mode_index; ctx->mic = *xd->mi_8x8[0]; - ctx->best_ref_mv.as_int = ref_mv->as_int; - ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; + ctx->best_ref_mv[0].as_int = ref_mv->as_int; + ctx->best_ref_mv[1].as_int = second_ref_mv->as_int; - ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY]; - ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; - ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; + ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; + ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; + ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); vpx_memcpy(ctx->best_filter_diff, best_filter_diff, @@ -2295,58 +2307,47 @@ static void setup_pred_block(const MACROBLOCKD *xd, } } -static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, - int idx, MV_REFERENCE_FRAME frame_type, - BLOCK_SIZE block_size, - int mi_row, int mi_col, - int_mv frame_nearest_mv[MAX_REF_FRAMES], - int_mv frame_near_mv[MAX_REF_FRAMES], - struct buf_2d yv12_mb[4][MAX_MB_PLANE], - struct scale_factors scale[MAX_REF_FRAMES]) { - VP9_COMMON *cm = &cpi->common; - YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; +void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, + const TileInfo *const tile, + MV_REFERENCE_FRAME ref_frame, + BLOCK_SIZE block_size, + int mi_row, int mi_col, + int_mv frame_nearest_mv[MAX_REF_FRAMES], + int_mv frame_near_mv[MAX_REF_FRAMES], + struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { + const VP9_COMMON *cm = &cpi->common; + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - - // set up scaling factors - scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1]; - - scale[frame_type].sfc->set_scaled_offsets(&scale[frame_type], - mi_row * MI_SIZE, mi_col * MI_SIZE); + MODE_INFO *const mi = xd->mi_8x8[0]; + int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame]; + const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this // use the UV scaling factors. - setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col, - &scale[frame_type], &scale[frame_type]); + setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); // Gets an initial list of candidate vectors from neighbours and orders them - vp9_find_mv_refs(cm, xd, tile, xd->mi_8x8[0], - xd->last_mi, - frame_type, - mbmi->ref_mvs[frame_type], mi_row, mi_col); + vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col); // Candidate refinement carried out at encoder and decoder - vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, - mbmi->ref_mvs[frame_type], - &frame_nearest_mv[frame_type], - &frame_near_mv[frame_type]); + vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, + &frame_nearest_mv[ref_frame], + &frame_near_mv[ref_frame]); // Further refinement that is encode side only to test the top few candidates // in full and choose the best as the centre point for subsequent searches. // The current implementation doesn't support scaling. - if (!vp9_is_scaled(scale[frame_type].sfc) && block_size >= BLOCK_8X8) - mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride, - frame_type, block_size); + if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8) + mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, + ref_frame, block_size); } -static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) { - YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; - int fb = get_ref_frame_idx(cpi, ref_frame); - int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame); - if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb]) - scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]]; - return scaled_ref_frame; +const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, + int ref_frame) { + const VP9_COMMON *const cm = &cpi->common; + const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; + const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; + return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; } static INLINE int get_switchable_rate(const MACROBLOCK *x) { @@ -2369,17 +2370,22 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int bestsme = INT_MAX; int further_steps, step_param; int sadpb = x->sadperbit16; - int_mv mvp_full; + MV mvp_full; int ref = mbmi->ref_frame[0]; - int_mv ref_mv = mbmi->ref_mvs[ref][0]; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); + MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; int tmp_col_min = x->mv_col_min; int tmp_col_max = x->mv_col_max; int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; - YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref); + const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, + ref); + + MV pred_mv[3]; + pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv; + pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv; + pred_mv[2] = x->pred_mv[ref].as_mv; if (scaled_ref_frame) { int i; @@ -2389,40 +2395,21 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; - setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); + vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); } - vp9_clamp_mv_min_max(x, &ref_mv.as_mv); - - // Adjust search parameters based on small partitions' result. - if (x->fast_ms) { - // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 && - // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) { - // adjust search range - step_param = 6; - if (x->fast_ms > 1) - step_param = 8; - - // Get prediction MV. - mvp_full.as_int = x->pred_mv[ref].as_int; + vp9_set_mv_search_range(x, &ref_mv); - // Adjust MV sign if needed. - if (cm->ref_frame_sign_bias[ref]) { - mvp_full.as_mv.col *= -1; - mvp_full.as_mv.row *= -1; - } + // Work out the size of the first step in the mv step search. + // 0 here is maximum length first step. 1 is MAX >> 1 etc. + if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { + // Take wtd average of the step_params based on the last frame's + // max mv magnitude and that based on the best ref mvs of the current + // block for the given reference. + step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) + + cpi->mv_step_param) >> 1; } else { - // Work out the size of the first step in the mv step search. - // 0 here is maximum length first step. 1 is MAX >> 1 etc. - if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { - // Take wtd average of the step_params based on the last frame's - // max mv magnitude and that based on the best ref mvs of the current - // block for the given reference. - step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) + - cpi->mv_step_param) >> 1; - } else { - step_param = cpi->mv_step_param; - } + step_param = cpi->mv_step_param; } if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 && @@ -2432,39 +2419,78 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, step_param = MAX(step_param, boffset); } - mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ? - mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int : - x->pred_mv[ref].as_int; + if (cpi->sf.adaptive_motion_search) { + int bwl = b_width_log2_lookup[bsize]; + int bhl = b_height_log2_lookup[bsize]; + int i; + int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); + + if (tlevel < 5) + step_param += 2; - mvp_full.as_mv.col >>= 3; - mvp_full.as_mv.row >>= 3; + for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) { + if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { + x->pred_mv[ref].as_int = 0; + tmp_mv->as_int = INVALID_MV; + + if (scaled_ref_frame) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[0] = backup_yv12[i]; + } + return; + } + } + } + + mvp_full = pred_mv[x->mv_best_ref_index[ref]]; + + mvp_full.col >>= 3; + mvp_full.row >>= 3; // Further step/diamond searches as necessary further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - if (cpi->sf.search_method == HEX) { - bestsme = vp9_hex_search(x, &mvp_full.as_mv, - step_param, - sadpb, 1, - &cpi->fn_ptr[block_size], 1, - &ref_mv.as_mv, &tmp_mv->as_mv); + if (cpi->sf.search_method == FAST_DIAMOND) { + bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); + } else if (cpi->sf.search_method == FAST_HEX) { + bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); + } else if (cpi->sf.search_method == HEX) { + bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); } else if (cpi->sf.search_method == SQUARE) { - bestsme = vp9_square_search(x, &mvp_full.as_mv, - step_param, - sadpb, 1, - &cpi->fn_ptr[block_size], 1, - &ref_mv.as_mv, &tmp_mv->as_mv); + bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); } else if (cpi->sf.search_method == BIGDIA) { - bestsme = vp9_bigdia_search(x, &mvp_full.as_mv, - step_param, - sadpb, 1, - &cpi->fn_ptr[block_size], 1, - &ref_mv.as_mv, &tmp_mv->as_mv); + bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); } else { bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 1, - &cpi->fn_ptr[block_size], - &ref_mv, tmp_mv); + &cpi->fn_ptr[bsize], + &ref_mv, &tmp_mv->as_mv); } x->mv_col_min = tmp_col_min; @@ -2474,16 +2500,16 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ - unsigned int sse; - cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv, + cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[block_size], - 0, cpi->sf.subpel_iters_per_step, + &cpi->fn_ptr[bsize], + cpi->sf.subpel_force_stop, + cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, - &dis, &sse); + &dis, &x->pred_sse[ref]); } - *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv, + *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) @@ -2502,13 +2528,13 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], int *rate_mv) { - int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize); + const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; + const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; int_mv ref_mv[2]; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); int ite, ref; // Prediction buffer from second frame. uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); @@ -2517,9 +2543,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, struct buf_2d backup_yv12[2][MAX_MB_PLANE]; struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0]; int last_besterr[2] = {INT_MAX, INT_MAX}; - YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { - get_scaled_ref_frame(cpi, mbmi->ref_frame[0]), - get_scaled_ref_frame(cpi, mbmi->ref_frame[1]) + const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { + vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]), + vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1]) }; for (ref = 0; ref < 2; ++ref) { @@ -2532,11 +2558,10 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[ref][i] = xd->plane[i].pre[ref]; - setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL); + vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, + NULL); } - xd->scale_factor[ref].sfc->set_scaled_offsets(&xd->scale_factor[ref], - mi_row, mi_col); frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int; } @@ -2564,14 +2589,15 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, ref_yv12[!id].stride, second_pred, pw, &frame_mv[refs[!id]].as_mv, - &xd->scale_factor[!id], + &xd->block_refs[!id]->sf, pw, ph, 0, - &xd->subpix, MV_PRECISION_Q3); + xd->interp_kernel, MV_PRECISION_Q3, + mi_col * MI_SIZE, mi_row * MI_SIZE); // Compound motion search on first ref frame. if (id) xd->plane[0].pre[0] = ref_yv12[id]; - vp9_clamp_mv_min_max(x, &ref_mv[id].as_mv); + vp9_set_mv_search_range(x, &ref_mv[id].as_mv); // Use mv result from single mode as mvp. tmp_mv.as_int = frame_mv[refs[id]].as_int; @@ -2580,12 +2606,15 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, tmp_mv.as_mv.row >>= 3; // Small-range full-pixel motion search - bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, + bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb, search_range, - &cpi->fn_ptr[block_size], + &cpi->fn_ptr[bsize], x->nmvjointcost, x->mvcost, - &ref_mv[id], second_pred, + &ref_mv[id].as_mv, second_pred, pw, ph); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv, + second_pred, &cpi->fn_ptr[bsize], 1); x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -2595,13 +2624,12 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - bestsme = cpi->find_fractional_mv_step_comp( x, &tmp_mv.as_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[block_size], + &cpi->fn_ptr[bsize], 0, cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, @@ -2637,6 +2665,16 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, vpx_free(second_pred); } +static INLINE void restore_dst_buf(MACROBLOCKD *xd, + uint8_t *orig_dst[MAX_MB_PLANE], + int orig_dst_stride[MAX_MB_PLANE]) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = orig_dst[i]; + xd->plane[i].dst.stride = orig_dst_stride[i]; + } +} + static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BLOCK_SIZE bsize, @@ -2646,7 +2684,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate_y, int64_t *distortion_y, int *rate_uv, int64_t *distortion_uv, int *mode_excluded, int *disable_skip, - INTERPOLATION_TYPE *best_filter, + INTERP_FILTER *best_filter, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], @@ -2702,6 +2740,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int_mv tmp_mv; single_motion_search(cpi, x, tile, bsize, mi_row, mi_col, &tmp_mv, &rate_mv); + if (tmp_mv.as_int == INVALID_MV) + return INT64_MAX; *rate2 += rate_mv; frame_mv[refs[0]].as_int = xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; @@ -2709,49 +2749,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - // if we're near/nearest and mv == 0,0, compare to zeromv - if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && - frame_mv[refs[0]].as_int == 0 && - !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && - (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) { - int rfc = mbmi->mode_context[mbmi->ref_frame[0]]; - int c1 = cost_mv_ref(cpi, NEARMV, rfc); - int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); - int c3 = cost_mv_ref(cpi, ZEROMV, rfc); - - if (this_mode == NEARMV) { - if (c1 > c3) - return INT64_MAX; - } else if (this_mode == NEARESTMV) { - if (c2 > c3) - return INT64_MAX; - } else { - assert(this_mode == ZEROMV); - if (num_refs == 1) { - if ((c3 >= c2 && - mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) || - (c3 >= c1 && - mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0)) - return INT64_MAX; - } else { - if ((c3 >= c2 && - mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 && - mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) || - (c3 >= c1 && - mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && - mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) - return INT64_MAX; - } - } - } - for (i = 0; i < num_refs; ++i) { cur_mv[i] = frame_mv[refs[i]]; // Clip "next_nearest" so that it does not extend to far out of image if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd); - if (mv_check_bounds(x, &cur_mv[i])) + if (mv_check_bounds(x, &cur_mv[i].as_mv)) return INT64_MAX; mbmi->mv[i].as_int = cur_mv[i].as_int; } @@ -2770,67 +2774,59 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other * words if you present them in that order, the second one is always known * if the first is known */ - *rate2 += cost_mv_ref(cpi, this_mode, - mbmi->mode_context[mbmi->ref_frame[0]]); + *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]); - if (!(*mode_excluded)) { - if (is_comp_pred) { - *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); - } else { - *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); - } - } + if (!(*mode_excluded)) + *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE + : cm->reference_mode == COMPOUND_REFERENCE; pred_exists = 0; // Are all MVs integer pel for Y and UV - intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 && - (mbmi->mv[0].as_mv.col & 15) == 0; + intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv); if (is_comp_pred) - intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 && - (mbmi->mv[1].as_mv.col & 15) == 0; + intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv); + // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used - if (cm->mcomp_filter_type != BILINEAR) { + cpi->mask_filter_rd = 0; + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + cpi->rd_filter_cache[i] = INT64_MAX; + + if (cm->interp_filter != BILINEAR) { *best_filter = EIGHTTAP; if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { *best_filter = EIGHTTAP; - vp9_zero(cpi->rd_filter_cache); } else { - int i, newbest; + int newbest; int tmp_rate_sum = 0; int64_t tmp_dist_sum = 0; - cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX; for (i = 0; i < SWITCHABLE_FILTERS; ++i) { int j; int64_t rs_rd; mbmi->interp_filter = i; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); rs = get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (i > 0 && intpel_mv) { - cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, - tmp_rate_sum, tmp_dist_sum); + rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); + cpi->rd_filter_cache[i] = rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], - cpi->rd_filter_cache[i] + rs_rd); - rd = cpi->rd_filter_cache[i]; - if (cm->mcomp_filter_type == SWITCHABLE) + MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); + if (cm->interp_filter == SWITCHABLE) rd += rs_rd; + cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); } else { int rate_sum = 0; int64_t dist_sum = 0; - if ((cm->mcomp_filter_type == SWITCHABLE && + if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) || - (cm->mcomp_filter_type != SWITCHABLE && - (cm->mcomp_filter_type == mbmi->interp_filter || + (cm->interp_filter != SWITCHABLE && + (cm->interp_filter == mbmi->interp_filter || (i == 0 && intpel_mv)))) { - for (j = 0; j < MAX_MB_PLANE; j++) { - xd->plane[j].dst.buf = orig_dst[j]; - xd->plane[j].dst.stride = orig_dst_stride[j]; - } + restore_dst_buf(xd, orig_dst, orig_dst_stride); } else { for (j = 0; j < MAX_MB_PLANE; j++) { xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; @@ -2839,25 +2835,24 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); - cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, - rate_sum, dist_sum); + + rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); + cpi->rd_filter_cache[i] = rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], - cpi->rd_filter_cache[i] + rs_rd); - rd = cpi->rd_filter_cache[i]; - if (cm->mcomp_filter_type == SWITCHABLE) + MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); + if (cm->interp_filter == SWITCHABLE) rd += rs_rd; + cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); + if (i == 0 && intpel_mv) { tmp_rate_sum = rate_sum; tmp_dist_sum = dist_sum; } } + if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { if (rd / 2 > ref_best_rd) { - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = orig_dst[i]; - xd->plane[i].dst.stride = orig_dst_stride[i]; - } + restore_dst_buf(xd, orig_dst, orig_dst_stride); return INT64_MAX; } } @@ -2866,28 +2861,24 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (newbest) { best_rd = rd; *best_filter = mbmi->interp_filter; - if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv) + if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) best_needs_copy = !best_needs_copy; } - if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || - (cm->mcomp_filter_type != SWITCHABLE && - cm->mcomp_filter_type == mbmi->interp_filter)) { + if ((cm->interp_filter == SWITCHABLE && newbest) || + (cm->interp_filter != SWITCHABLE && + cm->interp_filter == mbmi->interp_filter)) { pred_exists = 1; } } - - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = orig_dst[i]; - xd->plane[i].dst.stride = orig_dst_stride[i]; - } + restore_dst_buf(xd, orig_dst, orig_dst_stride); } } // Set the appropriate filter - mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? - cm->mcomp_filter_type : *best_filter; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0; + mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? + cm->interp_filter : *best_filter; + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); + rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0; if (pred_exists) { if (best_needs_copy) { @@ -2903,7 +2894,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); } - if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { int tmp_rate; int64_t tmp_dist; @@ -2912,44 +2902,37 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // if current pred_error modeled rd is substantially more than the best // so far, do not bother doing full rd if (rd / 2 > ref_best_rd) { - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = orig_dst[i]; - xd->plane[i].dst.stride = orig_dst_stride[i]; - } + restore_dst_buf(xd, orig_dst, orig_dst_stride); return INT64_MAX; } } - if (cpi->common.mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) *rate2 += get_switchable_rate(x); - if (!is_comp_pred && cpi->enable_encode_breakout) { - if (cpi->active_map_enabled && x->active_ptr[0] == 0) + if (!is_comp_pred) { + if (!x->in_active_map) { + if (psse) + *psse = 0; + *distortion = 0; x->skip = 1; - else if (x->encode_breakout) { + } else if (cpi->allow_encode_breakout && x->encode_breakout) { const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]); const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); unsigned int var, sse; // Skipping threshold for ac. unsigned int thresh_ac; - // The encode_breakout input - unsigned int encode_breakout = x->encode_breakout << 4; - unsigned int max_thresh = 36000; - + // Set a maximum for threshold to avoid big PSNR loss in low bitrate case. // Use extreme low threshold for static frames to limit skipping. - if (cpi->enable_encode_breakout == 2) - max_thresh = 128; + const unsigned int max_thresh = (cpi->allow_encode_breakout == + ENCODE_BREAKOUT_LIMITED) ? 128 : 36000; + // The encode_breakout input + const unsigned int min_thresh = + MIN(((unsigned int)x->encode_breakout << 4), max_thresh); // Calculate threshold according to dequant value. thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; - - // Use encode_breakout input if it is bigger than internal threshold. - if (thresh_ac < encode_breakout) - thresh_ac = encode_breakout; - - // Set a maximum for threshold to avoid big PSNR loss in low bitrate case. - if (thresh_ac > max_thresh) - thresh_ac = max_thresh; + thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, @@ -2990,7 +2973,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->skip = 1; // The cost of skip bit needs to be added. - *rate2 += vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1); + *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); // Scaling factor for SSE from spatial domain to frequency domain // is 16. Adjust distortion accordingly. @@ -3012,16 +2995,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t rdcosty = INT64_MAX; // Y cost and distortion - super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse, - bsize, txfm_cache, ref_best_rd); + inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse, + bsize, txfm_cache, ref_best_rd); if (*rate_y == INT_MAX) { *rate2 = INT_MAX; *distortion = INT64_MAX; - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = orig_dst[i]; - xd->plane[i].dst.stride = orig_dst_stride[i]; - } + restore_dst_buf(xd, orig_dst, orig_dst_stride); return INT64_MAX; } @@ -3036,10 +3016,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (*rate_uv == INT_MAX) { *rate2 = INT_MAX; *distortion = INT64_MAX; - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = orig_dst[i]; - xd->plane[i].dst.stride = orig_dst_stride[i]; - } + restore_dst_buf(xd, orig_dst, orig_dst_stride); return INT64_MAX; } @@ -3049,11 +3026,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *skippable = skippable_y && skippable_uv; } - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = orig_dst[i]; - xd->plane[i].dst.stride = orig_dst_stride[i]; - } - + restore_dst_buf(xd, orig_dst, orig_dst_stride); return this_rd; // if 0, this will be re-calculated by caller } @@ -3065,9 +3038,9 @@ static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, for (i = 0; i < max_plane; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; - pd[i].qcoeff = ctx->qcoeff_pbuf[i][1]; + p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; - pd[i].eobs = ctx->eobs_pbuf[i][1]; + p[i].eobs = ctx->eobs_pbuf[i][1]; ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0]; ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0]; @@ -3075,9 +3048,9 @@ static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0]; ctx->coeff_pbuf[i][0] = p[i].coeff; - ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff; + ctx->qcoeff_pbuf[i][0] = p[i].qcoeff; ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; - ctx->eobs_pbuf[i][0] = pd[i].eobs; + ctx->eobs_pbuf[i][0] = p[i].eobs; } } @@ -3090,9 +3063,11 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; int y_skip = 0, uv_skip = 0; int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 }; + TX_SIZE max_uv_tx_size; x->skip_encode = 0; ctx->skip = 0; xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; + if (bsize >= BLOCK_8X8) { if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, bsize, tx_cache, @@ -3100,8 +3075,9 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } + max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, - &dist_uv, &uv_skip, bsize); + &dist_uv, &uv_skip, bsize, max_uv_tx_size); } else { y_skip = 0; if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, @@ -3109,19 +3085,19 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } + max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, - &dist_uv, &uv_skip, BLOCK_8X8); + &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size); } if (y_skip && uv_skip) { *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + - vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1); + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); *returndist = dist_y + dist_uv; vp9_zero(ctx->tx_rd_diff); } else { int i; - *returnrate = rate_y + rate_uv + - vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0); + *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); *returndist = dist_y + dist_uv; if (cpi->sf.tx_size_search_method == USE_FULL_RD) for (i = 0; i < TX_MODES; i++) { @@ -3143,10 +3119,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; - const struct segmentation *seg = &cm->seg; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const struct segmentation *const seg = &cm->seg; const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; @@ -3157,19 +3133,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; - int idx_list[4] = {0, - cpi->lst_fb_idx, - cpi->gld_fb_idx, - cpi->alt_fb_idx}; int64_t best_rd = best_rd_so_far; int64_t best_tx_rd[TX_MODES]; int64_t best_tx_diff[TX_MODES]; - int64_t best_pred_diff[NB_PREDICTION_TYPES]; - int64_t best_pred_rd[NB_PREDICTION_TYPES]; + int64_t best_pred_diff[REFERENCE_MODES]; + int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MB_MODE_INFO best_mbmode = { 0 }; - int j; int mode_index, best_mode_index = 0; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vp9_prob comp_mode_p; @@ -3177,31 +3148,31 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_inter_rd = INT64_MAX; MB_PREDICTION_MODE best_intra_mode = DC_PRED; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; - INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE; + INTERP_FILTER tmp_best_filter = SWITCHABLE; int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; MB_PREDICTION_MODE mode_uv[TX_SIZES]; - struct scale_factors scale_factor[4]; - unsigned int ref_frame_mask = 0; - unsigned int mode_mask = 0; int64_t mode_distortions[MB_MODE_COUNT] = {-1}; - int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); const int bws = num_8x8_blocks_wide_lookup[bsize] / 2; const int bhs = num_8x8_blocks_high_lookup[bsize] / 2; int best_skip2 = 0; + int mode_skip_mask = 0; + int mode_skip_start = cpi->sf.mode_skip_start + 1; + const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; + const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; + const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; + const int intra_y_mode_mask = + cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; + int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; - // Everywhere the flag is set the error is much higher than its neighbors. - ctx->frames_with_high_error = 0; - ctx->modes_with_high_error = 0; - estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); - for (i = 0; i < NB_PREDICTION_TYPES; ++i) + for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; for (i = 0; i < TX_MODES; i++) best_tx_rd[i] = INT64_MAX; @@ -3209,51 +3180,106 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_filter_rd[i] = INT64_MAX; for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX; + for (i = 0; i < MAX_REF_FRAMES; ++i) + x->pred_sse[i] = INT_MAX; *returnrate = INT_MAX; - // Create a mask set to 1 for each reference frame used by a smaller - // resolution. - if (cpi->sf.use_avoid_tested_higherror) { - switch (block_size) { - case BLOCK_64X64: - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - ref_frame_mask |= x->mb_context[i][j].frames_with_high_error; - mode_mask |= x->mb_context[i][j].modes_with_high_error; - } - } - for (i = 0; i < 4; i++) { - ref_frame_mask |= x->sb32_context[i].frames_with_high_error; - mode_mask |= x->sb32_context[i].modes_with_high_error; - } - break; - case BLOCK_32X32: - for (i = 0; i < 4; i++) { - ref_frame_mask |= - x->mb_context[x->sb_index][i].frames_with_high_error; - mode_mask |= x->mb_context[x->sb_index][i].modes_with_high_error; + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + x->pred_mv_sad[ref_frame] = INT_MAX; + if (cpi->ref_frame_flags & flag_list[ref_frame]) { + vp9_setup_buffer_inter(cpi, x, tile, + ref_frame, block_size, mi_row, mi_col, + frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); + } + frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; + frame_mv[ZEROMV][ref_frame].as_int = 0; + } + + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + // All modes from vp9_mode_order that use this frame as any ref + static const int ref_frame_mask_all[] = { + 0x0, 0x123291, 0x25c444, 0x39b722 + }; + // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use + // this frame as their primary ref + static const int ref_frame_mask_fixedmv[] = { + 0x0, 0x121281, 0x24c404, 0x080102 + }; + if (!(cpi->ref_frame_flags & flag_list[ref_frame])) { + // Skip modes for missing references + mode_skip_mask |= ref_frame_mask_all[ref_frame]; + } else if (cpi->sf.reference_masking) { + for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { + // Skip fixed mv modes for poor references + if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) { + mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame]; + break; } - break; - default: - // Until we handle all block sizes set it to present; - ref_frame_mask = 0; - mode_mask = 0; - break; + } + } + // If the segment reference frame feature is enabled.... + // then do nothing if the current ref frame is not allowed.. + if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && + vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { + mode_skip_mask |= ref_frame_mask_all[ref_frame]; } - ref_frame_mask = ~ref_frame_mask; - mode_mask = ~mode_mask; } - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { - if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame, - block_size, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - yv12_mb, scale_factor); + // If the segment skip feature is enabled.... + // then do nothing if the current mode is not allowed.. + if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { + const int inter_non_zero_mode_mask = 0x1F7F7; + mode_skip_mask |= inter_non_zero_mode_mask; + } + + // Disable this drop out case if the ref frame + // segment level feature is enabled for this segment. This is to + // prevent the possibility that we end up unable to pick any mode. + if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { + // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, + // unless ARNR filtering is enabled in which case we want + // an unfiltered alternative. We allow near/nearest as well + // because they may result in zero-zero MVs but be cheaper. + if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { + const int altref_zero_mask = + ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA)); + mode_skip_mask |= altref_zero_mask; + if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0) + mode_skip_mask |= (1 << THR_NEARA); + if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0) + mode_skip_mask |= (1 << THR_NEARESTA); } - frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; - frame_mv[ZEROMV][ref_frame].as_int = 0; + } + + // TODO(JBB): This is to make up for the fact that we don't have sad + // functions that work when the block size reads outside the umv. We + // should fix this either by making the motion search just work on + // a representative block in the boundary ( first ) and then implement a + // function that does sads when inside the border.. + if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) { + const int new_modes_mask = + (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) | + (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA); + mode_skip_mask |= new_modes_mask; + } + + if (bsize > cpi->sf.max_intra_bsize) { + mode_skip_mask |= 0xFF30808; + } + + if (!x->in_active_map) { + int mode_index; + assert(cpi->ref_frame_flags & VP9_LAST_FLAG); + if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0) + mode_index = THR_NEARESTMV; + else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0) + mode_index = THR_NEARMV; + else + mode_index = THR_ZEROMV; + mode_skip_mask = ~(1 << mode_index); + mode_skip_start = MAX_MODES; + disable_inter_mode_mask = 0; } for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { @@ -3267,125 +3293,137 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t tx_cache[TX_MODES]; int i; int this_skip2 = 0; - int64_t total_sse = INT_MAX; + int64_t total_sse = INT64_MAX; int early_term = 0; - for (i = 0; i < TX_MODES; ++i) - tx_cache[i] = INT64_MAX; - - x->skip = 0; - this_mode = vp9_mode_order[mode_index].mode; - ref_frame = vp9_mode_order[mode_index].ref_frame; - second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; - // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. - if (mode_index > cpi->sf.mode_skip_start) { - if (mode_index == (cpi->sf.mode_skip_start + 1)) { - switch (vp9_mode_order[best_mode_index].ref_frame) { - case INTRA_FRAME: - cpi->mode_skip_mask = 0; - break; - case LAST_FRAME: - cpi->mode_skip_mask = LAST_FRAME_MODE_MASK; - break; - case GOLDEN_FRAME: - cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK; - break; - case ALTREF_FRAME: - cpi->mode_skip_mask = ALT_REF_MODE_MASK; - break; - case NONE: - case MAX_REF_FRAMES: - assert(!"Invalid Reference frame"); - } + if (mode_index == mode_skip_start) { + switch (vp9_mode_order[best_mode_index].ref_frame[0]) { + case INTRA_FRAME: + break; + case LAST_FRAME: + mode_skip_mask |= LAST_FRAME_MODE_MASK; + break; + case GOLDEN_FRAME: + mode_skip_mask |= GOLDEN_FRAME_MODE_MASK; + break; + case ALTREF_FRAME: + mode_skip_mask |= ALT_REF_MODE_MASK; + break; + case NONE: + case MAX_REF_FRAMES: + assert(0 && "Invalid Reference frame"); } - if (cpi->mode_skip_mask & ((int64_t)1 << mode_index)) - continue; } - - // Skip if the current reference frame has been masked off - if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && - (cpi->ref_frame_mask & (1 << ref_frame))) + if (mode_skip_mask & (1 << mode_index)) continue; // Test best rd so far against threshold for trying this mode. - if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] * - cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) || - cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX) - continue; - - // Do not allow compound prediction if the segment level reference - // frame feature is in use as in this case there can only be one reference. - if ((second_ref_frame > INTRA_FRAME) && - vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) - continue; + if (best_rd < ((int64_t)rd_threshes[mode_index] * + rd_thresh_freq_fact[mode_index] >> 5) || + rd_threshes[mode_index] == INT_MAX) + continue; - // Skip some checking based on small partitions' result. - if (x->fast_ms > 1 && !ref_frame) - continue; - if (x->fast_ms > 2 && ref_frame != x->subblock_ref) + this_mode = vp9_mode_order[mode_index].mode; + ref_frame = vp9_mode_order[mode_index].ref_frame[0]; + if (ref_frame != INTRA_FRAME && + disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode))) continue; + second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; - if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) { - if (!(ref_frame_mask & (1 << ref_frame))) { + comp_pred = second_ref_frame > INTRA_FRAME; + if (comp_pred) { + if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && + vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) continue; - } - if (!(mode_mask & (1 << this_mode))) { + if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) && + ref_frame != best_inter_ref_frame && + second_ref_frame != best_inter_ref_frame) continue; - } - if (second_ref_frame != NONE - && !(ref_frame_mask & (1 << second_ref_frame))) { + mode_excluded = cm->reference_mode == SINGLE_REFERENCE; + } else { + if (ref_frame != INTRA_FRAME) + mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; + } + + if (ref_frame == INTRA_FRAME) { + if (!(intra_y_mode_mask & (1 << this_mode))) continue; + if (this_mode != DC_PRED) { + // Disable intra modes other than DC_PRED for blocks with low variance + // Threshold for intra skipping based on source variance + // TODO(debargha): Specialize the threshold for super block sizes + const unsigned int skip_intra_var_thresh = 64; + if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && + x->source_variance < skip_intra_var_thresh) + continue; + // Only search the oblique modes if the best so far is + // one of the neighboring directional modes + if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && + (this_mode >= D45_PRED && this_mode <= TM_PRED)) { + if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME) + continue; + } + if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { + if (conditional_skipintra(this_mode, best_intra_mode)) + continue; + } + } + } else { + // TODO(aconverse): Find out if this is still productive then clean up or + // remove + // if we're near/nearest and mv == 0,0, compare to zeromv + if (x->in_active_map && + !(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + (this_mode == NEARMV || this_mode == NEARESTMV || + this_mode == ZEROMV) && + frame_mv[this_mode][ref_frame].as_int == 0 && + !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && + (!comp_pred || frame_mv[this_mode][second_ref_frame].as_int == 0)) { + int rfc = mbmi->mode_context[ref_frame]; + int c1 = cost_mv_ref(cpi, NEARMV, rfc); + int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); + int c3 = cost_mv_ref(cpi, ZEROMV, rfc); + + if (this_mode == NEARMV) { + if (c1 > c3) + continue; + } else if (this_mode == NEARESTMV) { + if (c2 > c3) + continue; + } else { + assert(this_mode == ZEROMV); + if (!comp_pred) { + if ((c3 >= c2 && + frame_mv[NEARESTMV][ref_frame].as_int == 0) || + (c3 >= c1 && + frame_mv[NEARMV][ref_frame].as_int == 0)) + continue; + } else { + if ((c3 >= c2 && + frame_mv[NEARESTMV][ref_frame].as_int == 0 && + frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || + (c3 >= c1 && + frame_mv[NEARMV][ref_frame].as_int == 0 && + frame_mv[NEARMV][second_ref_frame].as_int == 0)) + continue; + } + } } } + mbmi->mode = this_mode; + mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode; mbmi->ref_frame[0] = ref_frame; mbmi->ref_frame[1] = second_ref_frame; - - if (!(ref_frame == INTRA_FRAME - || (cpi->ref_frame_flags & flag_list[ref_frame]))) { - continue; - } - if (!(second_ref_frame == NONE - || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) { - continue; - } - - comp_pred = second_ref_frame > INTRA_FRAME; - if (comp_pred) { - if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) - if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) - continue; - if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) - if (ref_frame != best_inter_ref_frame && - second_ref_frame != best_inter_ref_frame) - continue; - } - - set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); - mbmi->uv_mode = DC_PRED; - // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->mcomp_filter_type; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - - if (comp_pred) { - if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) - continue; - set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); - - mode_excluded = mode_excluded - ? mode_excluded - : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; - } else { - if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { - mode_excluded = - mode_excluded ? - mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; - } - } + mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; + x->skip = 0; + set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { @@ -3394,46 +3432,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } - // If the segment reference frame feature is enabled.... - // then do nothing if the current ref frame is not allowed.. - if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && - vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != - (int)ref_frame) { - continue; - // If the segment skip feature is enabled.... - // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && - (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) { - continue; - // Disable this drop out case if the ref frame - // segment level feature is enabled for this segment. This is to - // prevent the possibility that we end up unable to pick any mode. - } else if (!vp9_segfeature_active(seg, segment_id, - SEG_LVL_REF_FRAME)) { - // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, - // unless ARNR filtering is enabled in which case we want - // an unfiltered alternative. We allow near/nearest as well - // because they may result in zero-zero MVs but be cheaper. - if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { - if ((this_mode != ZEROMV && - !(this_mode == NEARMV && - frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) && - !(this_mode == NEARESTMV && - frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) || - ref_frame != ALTREF_FRAME) { - continue; - } - } - } - // TODO(JBB): This is to make up for the fact that we don't have sad - // functions that work when the block size reads outside the umv. We - // should fix this either by making the motion search just work on - // a representative block in the boundary ( first ) and then implement a - // function that does sads when inside the border.. - if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) && - this_mode == NEWMV) { - continue; - } + for (i = 0; i < TX_MODES; ++i) + tx_cache[i] = INT64_MAX; #ifdef MODE_TEST_HIT_STATS // TEST/DEBUG CODE @@ -3441,44 +3441,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, cpi->mode_test_hits[bsize]++; #endif - if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; - // Disable intra modes other than DC_PRED for blocks with low variance - // Threshold for intra skipping based on source variance - // TODO(debargha): Specialize the threshold for super block sizes - static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = { - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - }; - if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && - this_mode != DC_PRED && - x->source_variance < skip_intra_var_thresh[mbmi->sb_type]) - continue; - // Only search the oblique modes if the best so far is - // one of the neighboring directional modes - if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && - (this_mode >= D45_PRED && this_mode <= TM_PRED)) { - if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME) - continue; - } - mbmi->mode = this_mode; - if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { - if (conditional_skipintra(mbmi->mode, best_intra_mode)) - continue; - } - - super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, - bsize, tx_cache, best_rd); + intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, + bsize, tx_cache, best_rd); if (rate_y == INT_MAX) continue; - uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]); + uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize); if (rate_uv_intra[uv_tx] == INT_MAX) { - choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx], - &rate_uv_tokenonly[uv_tx], - &dist_uv[uv_tx], &skip_uv[uv_tx], - &mode_uv[uv_tx]); + choose_intra_uv_mode(cpi, ctx, bsize, uv_tx, + &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx], + &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]); } rate_uv = rate_uv_tokenonly[uv_tx]; @@ -3491,8 +3466,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; } else { - mbmi->mode = this_mode; - compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME); this_rd = handle_inter_mode(cpi, x, tile, bsize, tx_cache, &rate2, &distortion2, &skippable, @@ -3504,15 +3477,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, single_newmv, &total_sse, best_rd); if (this_rd == INT64_MAX) continue; - } - if (cm->comp_pred_mode == HYBRID_PREDICTION) { - rate2 += compmode_cost; + compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); + + if (cm->reference_mode == REFERENCE_MODE_SELECT) + rate2 += compmode_cost; } // Estimate the reference frame signaling cost and add it // to the rolling cost variable. - if (second_ref_frame > INTRA_FRAME) { + if (comp_pred) { rate2 += ref_costs_comp[ref_frame]; } else { rate2 += ref_costs_single[ref_frame]; @@ -3537,9 +3511,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int prob_skip_cost; // Cost the skip mb case - vp9_prob skip_prob = - vp9_get_pred_prob_mbskip(cm, xd); - + vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); if (skip_prob) { prob_skip_cost = vp9_cost_bit(skip_prob, 1); rate2 += prob_skip_cost; @@ -3549,14 +3521,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { // Add in the cost of the no skip flag. - int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), - 0); - rate2 += prob_skip_cost; + rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } else { // FIXME(rbultje) make this work for splitmv also - int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), - 1); - rate2 += prob_skip_cost; + rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); distortion2 = total_sse; assert(total_sse >= 0); rate2 -= (rate_y + rate_uv); @@ -3566,32 +3534,29 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } else if (mb_skip_allowed) { // Add in the cost of the no skip flag. - int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), - 0); - rate2 += prob_skip_cost; + rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } // Calculate the final RD estimate for this mode. this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); } + if (ref_frame == INTRA_FRAME) { // Keep record of best intra rd - if (!is_inter_block(&xd->mi_8x8[0]->mbmi) && - this_rd < best_intra_rd) { - best_intra_rd = this_rd; - best_intra_mode = xd->mi_8x8[0]->mbmi.mode; - } - - // Keep record of best inter rd with single reference - if (is_inter_block(&xd->mi_8x8[0]->mbmi) && - !has_second_ref(&xd->mi_8x8[0]->mbmi) && - !mode_excluded && this_rd < best_inter_rd) { - best_inter_rd = this_rd; - best_inter_ref_frame = ref_frame; + if (this_rd < best_intra_rd) { + best_intra_rd = this_rd; + best_intra_mode = mbmi->mode; + } + } else { + // Keep record of best inter rd with single reference + if (!comp_pred && !mode_excluded && this_rd < best_inter_rd) { + best_inter_rd = this_rd; + best_inter_ref_frame = ref_frame; + } } if (!disable_skip && ref_frame == INTRA_FRAME) { - for (i = 0; i < NB_PREDICTION_TYPES; ++i) + for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); @@ -3602,10 +3567,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, || distortion2 < mode_distortions[this_mode]) { mode_distortions[this_mode] = distortion2; } - if (frame_distortions[ref_frame] == -1 - || distortion2 < frame_distortions[ref_frame]) { - frame_distortions[ref_frame] = distortion2; - } // Did this mode help.. i.e. is it the new best mode if (this_rd < best_rd || x->skip) { @@ -3632,7 +3593,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history - if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) && + if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) && (mode_index > MIN_EARLY_TERM_INDEX)) { const int qstep = xd->plane[0].dequant[1]; // TODO(debargha): Enhance this by specializing for each mode_index @@ -3651,9 +3612,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best compound/single-only prediction */ if (!disable_skip && ref_frame != INTRA_FRAME) { - int single_rd, hybrid_rd, single_rate, hybrid_rate; + int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; - if (cm->comp_pred_mode == HYBRID_PREDICTION) { + if (cm->reference_mode == REFERENCE_MODE_SELECT) { single_rate = rate2 - compmode_cost; hybrid_rate = rate2; } else { @@ -3664,40 +3625,39 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - if (second_ref_frame <= INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { - best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; - } else if (second_ref_frame > INTRA_FRAME && - single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { - best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; + if (!comp_pred) { + if (single_rd < best_pred_rd[SINGLE_REFERENCE]) { + best_pred_rd[SINGLE_REFERENCE] = single_rd; + } + } else { + if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) { + best_pred_rd[COMPOUND_REFERENCE] = single_rd; + } } - if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) - best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; - } + if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) + best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; + + /* keep record of best filter type */ + if (!mode_excluded && cm->interp_filter != BILINEAR) { + int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + SWITCHABLE_FILTERS : cm->interp_filter]; + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { + int64_t adj_rd; + if (ref == INT64_MAX) + adj_rd = 0; + else if (cpi->rd_filter_cache[i] == INT64_MAX) + // when early termination is triggered, the encoder does not have + // access to the rate-distortion cost. it only knows that the cost + // should be above the maximum valid value. hence it takes the known + // maximum plus an arbitrary constant as the rate-distortion cost. + adj_rd = cpi->mask_filter_rd - ref + 10; + else + adj_rd = cpi->rd_filter_cache[i] - ref; - /* keep record of best filter type */ - if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && - cm->mcomp_filter_type != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? - SWITCHABLE_FILTERS : cm->mcomp_filter_type]; - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { - int64_t adj_rd; - // In cases of poor prediction, filter_cache[] can contain really big - // values, which actually are bigger than this_rd itself. This can - // cause negative best_filter_rd[] values, which is obviously silly. - // Therefore, if filter_cache < ref, we do an adjusted calculation. - if (cpi->rd_filter_cache[i] >= ref) { - adj_rd = this_rd + cpi->rd_filter_cache[i] - ref; - } else { - // FIXME(rbultje) do this for comppsred also - // - // To prevent out-of-range computation in - // adj_rd = cpi->rd_filter_cache[i] * this_rd / ref - // cpi->rd_filter_cache[i] / ref is converted to a 256 based ratio. - int tmp = cpi->rd_filter_cache[i] * 256 / ref; - adj_rd = (this_rd * tmp) >> 8; + adj_rd += this_rd; + best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); } - best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); } } @@ -3731,43 +3691,22 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // If we used an estimate for the uv intra rd in the loop above... if (cpi->sf.use_uv_intra_rd_estimate) { // Do Intra UV best rd mode selection if best mode choice above was intra. - if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) { - TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); + if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) { + TX_SIZE uv_tx_size; + *mbmi = best_mbmode; + uv_tx_size = get_uv_tx_size(mbmi); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size], &dist_uv[uv_tx_size], &skip_uv[uv_tx_size], - bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); + bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, + uv_tx_size); } } - // If we are using reference masking and the set mask flag is set then - // create the reference frame mask. - if (cpi->sf.reference_masking && cpi->set_ref_frame_mask) - cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame); - - // Flag all modes that have a distortion thats > 2x the best we found at - // this level. - for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) { - if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV) - continue; - - if (mode_distortions[mode_index] > 2 * *returndistortion) { - ctx->modes_with_high_error |= (1 << mode_index); - } - } - - // Flag all ref frames that have a distortion thats > 2x the best we found at - // this level. - for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { - if (frame_distortions[ref_frame] > 2 * *returndistortion) { - ctx->frames_with_high_error |= (1 << ref_frame); - } - } - - assert((cm->mcomp_filter_type == SWITCHABLE) || - (cm->mcomp_filter_type == best_mbmode.interp_filter) || - (best_mbmode.ref_frame[0] == INTRA_FRAME)); + assert((cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == best_mbmode.interp_filter) || + !is_inter_block(&best_mbmode)); // Updating rd_thresh_freq_fact[] here means that the different // partition/block sizes are handled independently based on the best @@ -3776,16 +3715,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // combination that wins out. if (cpi->sf.adaptive_rd_thresh) { for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { + int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index]; + if (mode_index == best_mode_index) { - cpi->rd_thresh_freq_fact[bsize][mode_index] -= - (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3); + *fact -= (*fact >> 3); } else { - cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC; - if (cpi->rd_thresh_freq_fact[bsize][mode_index] > - (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) { - cpi->rd_thresh_freq_fact[bsize][mode_index] = - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT; - } + *fact = MIN(*fact + RD_THRESH_INC, + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); } } } @@ -3794,7 +3730,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *mbmi = best_mbmode; x->skip |= best_skip2; - for (i = 0; i < NB_PREDICTION_TYPES; ++i) { + for (i = 0; i < REFERENCE_MODES; ++i) { if (best_pred_rd[i] == INT64_MAX) best_pred_diff[i] = INT_MIN; else @@ -3808,13 +3744,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, else best_filter_diff[i] = best_rd - best_filter_rd[i]; } - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); - } else { - vp9_zero(best_filter_diff); - } - - if (!x->skip) { for (i = 0; i < TX_MODES; i++) { if (best_tx_rd[i] == INT64_MAX) best_tx_diff[i] = 0; @@ -3822,11 +3753,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_tx_diff[i] = best_rd - best_tx_rd[i]; } } else { + vp9_zero(best_filter_diff); vp9_zero(best_tx_diff); } - set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], - scale_factor); + if (!x->in_active_map) { + assert(mbmi->ref_frame[0] == LAST_FRAME); + assert(mbmi->ref_frame[1] == NONE); + assert(mbmi->mode == NEARESTMV || + mbmi->mode == NEARMV || + mbmi->mode == ZEROMV); + assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0); + assert(mbmi->mode == mbmi->uv_mode); + } + + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, &mbmi->ref_mvs[mbmi->ref_frame[0]][0], &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : @@ -3857,16 +3798,12 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; - int idx_list[4] = {0, - cpi->lst_fb_idx, - cpi->gld_fb_idx, - cpi->alt_fb_idx}; int64_t best_rd = best_rd_so_far; int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise int64_t best_tx_rd[TX_MODES]; int64_t best_tx_diff[TX_MODES]; - int64_t best_pred_diff[NB_PREDICTION_TYPES]; - int64_t best_pred_rd[NB_PREDICTION_TYPES]; + int64_t best_pred_diff[REFERENCE_MODES]; + int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MB_MODE_INFO best_mbmode = { 0 }; @@ -3875,19 +3812,17 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_prob comp_mode_p; int64_t best_inter_rd = INT64_MAX; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; - INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE; + INTERP_FILTER tmp_best_filter = SWITCHABLE; int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 }; - struct scale_factors scale_factor[4]; - unsigned int ref_frame_mask = 0; - unsigned int mode_mask = 0; - int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, - cpi->common.y_dc_delta_q); + int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); int_mv seg_mvs[4][MAX_REF_FRAMES]; b_mode_info best_bmodes[4]; int best_skip2 = 0; + int ref_frame_mask = 0; + int mode_skip_mask = 0; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4); @@ -3901,7 +3836,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); - for (i = 0; i < NB_PREDICTION_TYPES; ++i) + for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; for (i = 0; i < TX_MODES; i++) best_tx_rd[i] = INT64_MAX; @@ -3912,26 +3847,28 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; - // Create a mask set to 1 for each reference frame used by a smaller - // resolution. - if (cpi->sf.use_avoid_tested_higherror) { - ref_frame_mask = 0; - mode_mask = 0; - ref_frame_mask = ~ref_frame_mask; - mode_mask = ~mode_mask; - } - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame, - block_size, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - yv12_mb, scale_factor); + vp9_setup_buffer_inter(cpi, x, tile, + ref_frame, block_size, mi_row, mi_col, + frame_mv[NEARESTMV], frame_mv[NEARMV], + yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } + for (ref_frame = LAST_FRAME; + ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { + int i; + for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { + if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) { + ref_frame_mask |= (1 << ref_frame); + break; + } + } + } + for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3950,40 +3887,35 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, tx_cache[i] = INT64_MAX; x->skip = 0; - ref_frame = vp9_ref_order[mode_index].ref_frame; - second_ref_frame = vp9_ref_order[mode_index].second_ref_frame; + ref_frame = vp9_ref_order[mode_index].ref_frame[0]; + second_ref_frame = vp9_ref_order[mode_index].ref_frame[1]; // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) { if (mode_index == 3) { - switch (vp9_ref_order[best_mode_index].ref_frame) { + switch (vp9_ref_order[best_mode_index].ref_frame[0]) { case INTRA_FRAME: - cpi->mode_skip_mask = 0; + mode_skip_mask = 0; break; case LAST_FRAME: - cpi->mode_skip_mask = 0x0010; + mode_skip_mask = 0x0010; break; case GOLDEN_FRAME: - cpi->mode_skip_mask = 0x0008; + mode_skip_mask = 0x0008; break; case ALTREF_FRAME: - cpi->mode_skip_mask = 0x0000; + mode_skip_mask = 0x0000; break; case NONE: case MAX_REF_FRAMES: - assert(!"Invalid Reference frame"); + assert(0 && "Invalid Reference frame"); } } - if (cpi->mode_skip_mask & ((int64_t)1 << mode_index)) + if (mode_skip_mask & (1 << mode_index)) continue; } - // Skip if the current reference frame has been masked off - if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && - (cpi->ref_frame_mask & (1 << ref_frame))) - continue; - // Test best rd so far against threshold for trying this mode. if ((best_rd < ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] * @@ -4012,7 +3944,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) - if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) + if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) continue; if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) if (ref_frame != best_inter_ref_frame && @@ -4022,35 +3954,32 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // TODO(jingning, jkoleszar): scaling reference frame not supported for // sub8x8 blocks. - if (ref_frame > 0 && - vp9_is_scaled(scale_factor[ref_frame].sfc)) + if (ref_frame > 0 && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) continue; if (second_ref_frame > 0 && - vp9_is_scaled(scale_factor[second_ref_frame].sfc)) + vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) continue; - set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); + set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); mbmi->uv_mode = DC_PRED; // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->mcomp_filter_type; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (comp_pred) { if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; - set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor); - mode_excluded = mode_excluded - ? mode_excluded - : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; + mode_excluded = mode_excluded ? mode_excluded + : cm->reference_mode == SINGLE_REFERENCE; } else { if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { - mode_excluded = - mode_excluded ? - mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; + mode_excluded = mode_excluded ? + mode_excluded : cm->reference_mode == COMPOUND_REFERENCE; } } @@ -4081,7 +4010,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // unless ARNR filtering is enabled in which case we want // an unfiltered alternative. We allow near/nearest as well // because they may result in zero-zero MVs but be cheaper. - if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) + if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) continue; } @@ -4102,7 +4031,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion_y; if (rate_uv_intra[TX_4X4] == INT_MAX) { - choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4], + choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4, + &rate_uv_intra[TX_4X4], &rate_uv_tokenonly[TX_4X4], &dist_uv[TX_4X4], &skip_uv[TX_4X4], &mode_uv[TX_4X4]); @@ -4139,13 +4069,21 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; - cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX; - if (cm->mcomp_filter_type != BILINEAR) { + cpi->mask_filter_rd = 0; + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + cpi->rd_filter_cache[i] = INT64_MAX; + + if (cm->interp_filter != BILINEAR) { tmp_best_filter = EIGHTTAP; if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { tmp_best_filter = EIGHTTAP; - vp9_zero(cpi->rd_filter_cache); + } else if (cpi->sf.adaptive_pred_interp_filter == 1 && + ctx->pred_interp_filter < SWITCHABLE) { + tmp_best_filter = ctx->pred_interp_filter; + } else if (cpi->sf.adaptive_pred_interp_filter == 2) { + tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ? + ctx->pred_interp_filter : 0; } else { for (switchable_filter_index = 0; switchable_filter_index < SWITCHABLE_FILTERS; @@ -4153,8 +4091,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int newbest, rs; int64_t rs_rd; mbmi->interp_filter = switchable_filter_index; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, &mbmi->ref_mvs[ref_frame][0], second_ref, @@ -4167,23 +4104,25 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; - cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; rs = get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); + cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; + cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd); + newbest = (tmp_rd < tmp_best_rd); if (newbest) { tmp_best_filter = mbmi->interp_filter; tmp_best_rd = tmp_rd; } - if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || - (mbmi->interp_filter == cm->mcomp_filter_type && - cm->mcomp_filter_type != SWITCHABLE)) { + if ((newbest && cm->interp_filter == SWITCHABLE) || + (mbmi->interp_filter == cm->interp_filter && + cm->interp_filter != SWITCHABLE)) { tmp_best_rdu = tmp_rd; tmp_best_rate = rate; tmp_best_ratey = rate_y; @@ -4193,7 +4132,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, tmp_best_mbmode = *mbmi; for (i = 0; i < 4; i++) { tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; - x->zcoeff_blk[TX_4X4][i] = !xd->plane[0].eobs[i]; + x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; } pred_exists = 1; if (switchable_filter_index == 0 && @@ -4212,12 +4151,12 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } - if (tmp_best_rdu == INT64_MAX) + if (tmp_best_rdu == INT64_MAX && pred_exists) continue; - mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? - tmp_best_filter : cm->mcomp_filter_type); - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? + tmp_best_filter : cm->interp_filter); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (!pred_exists) { // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level @@ -4233,11 +4172,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; } else { - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - int rs = get_switchable_rate(x); - tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); - } - tmp_rd = tmp_best_rdu; total_sse = tmp_best_sse; rate = tmp_best_rate; rate_y = tmp_best_ratey; @@ -4251,15 +4185,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate2 += rate; distortion2 += distortion; - if (cpi->common.mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) rate2 += get_switchable_rate(x); - if (!mode_excluded) { - if (comp_pred) - mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; - else - mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; - } + if (!mode_excluded) + mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE + : cm->reference_mode == COMPOUND_REFERENCE; + compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); tmp_best_rdu = best_rd - @@ -4286,9 +4218,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost; - } // Estimate the reference frame signaling cost and add it // to the rolling cost variable. @@ -4311,14 +4242,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { // Add in the cost of the no skip flag. - int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), - 0); - rate2 += prob_skip_cost; + rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } else { // FIXME(rbultje) make this work for splitmv also - int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), - 1); - rate2 += prob_skip_cost; + rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); distortion2 = total_sse; assert(total_sse >= 0); rate2 -= (rate_y + rate_uv); @@ -4328,9 +4255,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } else if (mb_skip_allowed) { // Add in the cost of the no skip flag. - int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), - 0); - rate2 += prob_skip_cost; + rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } // Calculate the final RD estimate for this mode. @@ -4338,8 +4263,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } // Keep record of best inter rd with single reference - if (xd->mi_8x8[0]->mbmi.ref_frame[0] > INTRA_FRAME && - xd->mi_8x8[0]->mbmi.ref_frame[1] == NONE && + if (is_inter_block(&xd->mi_8x8[0]->mbmi) && + !has_second_ref(&xd->mi_8x8[0]->mbmi) && !mode_excluded && this_rd < best_inter_rd) { best_inter_rd = this_rd; @@ -4347,7 +4272,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (!disable_skip && ref_frame == INTRA_FRAME) { - for (i = 0; i < NB_PREDICTION_TYPES; ++i) + for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); @@ -4402,9 +4327,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best compound/single-only prediction */ if (!disable_skip && ref_frame != INTRA_FRAME) { - int single_rd, hybrid_rd, single_rate, hybrid_rate; + int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + if (cm->reference_mode == REFERENCE_MODE_SELECT) { single_rate = rate2 - compmode_cost; hybrid_rate = rate2; } else { @@ -4416,31 +4341,35 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); if (second_ref_frame <= INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { - best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; + single_rd < best_pred_rd[SINGLE_REFERENCE]) { + best_pred_rd[SINGLE_REFERENCE] = single_rd; } else if (second_ref_frame > INTRA_FRAME && - single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { - best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; + single_rd < best_pred_rd[COMPOUND_REFERENCE]) { + best_pred_rd[COMPOUND_REFERENCE] = single_rd; } - if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) - best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; + if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) + best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; } /* keep record of best filter type */ if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && - cm->mcomp_filter_type != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? - SWITCHABLE_FILTERS : cm->mcomp_filter_type]; + cm->interp_filter != BILINEAR) { + int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + SWITCHABLE_FILTERS : cm->interp_filter]; + int64_t adj_rd; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { - int64_t adj_rd; - // In cases of poor prediction, filter_cache[] can contain really big - // values, which actually are bigger than this_rd itself. This can - // cause negative best_filter_rd[] values, which is obviously silly. - // Therefore, if filter_cache < ref, we do an adjusted calculation. - if (cpi->rd_filter_cache[i] >= ref) - adj_rd = this_rd + cpi->rd_filter_cache[i] - ref; - else // FIXME(rbultje) do this for comppred also - adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref; + if (ref == INT64_MAX) + adj_rd = 0; + else if (cpi->rd_filter_cache[i] == INT64_MAX) + // when early termination is triggered, the encoder does not have + // access to the rate-distortion cost. it only knows that the cost + // should be above the maximum valid value. hence it takes the known + // maximum plus an arbitrary constant as the rate-distortion cost. + adj_rd = cpi->mask_filter_rd - ref + 10; + else + adj_rd = cpi->rd_filter_cache[i] - ref; + + adj_rd += this_rd; best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); } } @@ -4479,30 +4408,27 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // If we used an estimate for the uv intra rd in the loop above... if (cpi->sf.use_uv_intra_rd_estimate) { // Do Intra UV best rd mode selection if best mode choice above was intra. - if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) { - TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); + if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) { + TX_SIZE uv_tx_size; + *mbmi = best_mbmode; + uv_tx_size = get_uv_tx_size(mbmi); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size], &dist_uv[uv_tx_size], &skip_uv[uv_tx_size], - BLOCK_8X8); + BLOCK_8X8, uv_tx_size); } } - // If we are using reference masking and the set mask flag is set then - // create the reference frame mask. - if (cpi->sf.reference_masking && cpi->set_ref_frame_mask) - cpi->ref_frame_mask = ~(1 << vp9_ref_order[best_mode_index].ref_frame); - if (best_rd == INT64_MAX && bsize < BLOCK_8X8) { *returnrate = INT_MAX; - *returndistortion = INT_MAX; + *returndistortion = INT64_MAX; return best_rd; } - assert((cm->mcomp_filter_type == SWITCHABLE) || - (cm->mcomp_filter_type == best_mbmode.interp_filter) || - (best_mbmode.ref_frame[0] == INTRA_FRAME)); + assert((cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == best_mbmode.interp_filter) || + !is_inter_block(&best_mbmode)); // Updating rd_thresh_freq_fact[] here means that the different // partition/block sizes are handled independently based on the best @@ -4511,16 +4437,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // combination that wins out. if (cpi->sf.adaptive_rd_thresh) { for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { + int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index]; + if (mode_index == best_mode_index) { - cpi->rd_thresh_freq_sub8x8[bsize][mode_index] -= - (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 3); + *fact -= (*fact >> 3); } else { - cpi->rd_thresh_freq_sub8x8[bsize][mode_index] += RD_THRESH_INC; - if (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] > - (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) { - cpi->rd_thresh_freq_sub8x8[bsize][mode_index] = - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT; - } + *fact = MIN(*fact + RD_THRESH_INC, + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); } } } @@ -4528,7 +4451,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // macroblock modes *mbmi = best_mbmode; x->skip |= best_skip2; - if (best_mbmode.ref_frame[0] == INTRA_FRAME) { + if (!is_inter_block(&best_mbmode)) { for (i = 0; i < 4; i++) xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode; } else { @@ -4539,7 +4462,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int; } - for (i = 0; i < NB_PREDICTION_TYPES; ++i) { + for (i = 0; i < REFERENCE_MODES; ++i) { if (best_pred_rd[i] == INT64_MAX) best_pred_diff[i] = INT_MIN; else @@ -4553,7 +4476,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, else best_filter_diff[i] = best_rd - best_filter_rd[i]; } - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); } else { vp9_zero(best_filter_diff); @@ -4570,8 +4493,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(best_tx_diff); } - set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], - scale_factor); + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, &mbmi->ref_mvs[mbmi->ref_frame[0]][0], &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : |