summaryrefslogtreecommitdiffstats
path: root/libvpx/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'libvpx/vp9/encoder')
-rw-r--r--libvpx/vp9/encoder/vp9_bitstream.c161
-rw-r--r--libvpx/vp9/encoder/vp9_block.h49
-rw-r--r--libvpx/vp9/encoder/vp9_encodeframe.c129
-rw-r--r--libvpx/vp9/encoder/vp9_encodemb.c123
-rw-r--r--libvpx/vp9/encoder/vp9_encodemv.c13
-rw-r--r--libvpx/vp9/encoder/vp9_firstpass.c13
-rw-r--r--libvpx/vp9/encoder/vp9_onyx_if.c106
-rw-r--r--libvpx/vp9/encoder/vp9_onyx_int.h1
-rw-r--r--libvpx/vp9/encoder/vp9_quantize.c78
-rw-r--r--libvpx/vp9/encoder/vp9_rdopt.c193
-rw-r--r--libvpx/vp9/encoder/vp9_tokenize.c6
-rw-r--r--libvpx/vp9/encoder/x86/vp9_dct_sse2.c5
-rw-r--r--libvpx/vp9/encoder/x86/vp9_subpel_variance.asm198
13 files changed, 650 insertions, 425 deletions
diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c
index 87bd36c..efbadba 100644
--- a/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/libvpx/vp9/encoder/vp9_bitstream.c
@@ -169,10 +169,8 @@ static void update_mode(vp9_writer *w, int n, vp9_tree tree,
const unsigned int num_events[/* n */]) {
int i = 0;
- vp9_tree_probs_from_distribution(tree, bct, num_events, 0);
- n--;
-
- for (i = 0; i < n; ++i)
+ vp9_tree_probs_from_distribution(tree, bct, num_events);
+ for (i = 0; i < n - 1; ++i)
vp9_cond_prob_diff_update(w, &Pcur[i], bct[i]);
}
@@ -191,12 +189,14 @@ static void update_mbintra_mode_probs(VP9_COMP* const cpi,
static void write_selected_tx_size(const VP9_COMP *cpi, MODE_INFO *m,
TX_SIZE tx_size, BLOCK_SIZE bsize,
vp9_writer *w) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- const vp9_prob *tx_probs = get_tx_probs2(xd, &cpi->common.fc.tx_probs, m);
+ const vp9_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
+ &cpi->common.fc.tx_probs);
vp9_write(w, tx_size != TX_4X4, tx_probs[0]);
- if (bsize >= BLOCK_16X16 && tx_size != TX_4X4) {
+ if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
vp9_write(w, tx_size != TX_8X8, tx_probs[1]);
- if (bsize >= BLOCK_32X32 && tx_size != TX_8X8)
+ if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
vp9_write(w, tx_size != TX_16X16, tx_probs[2]);
}
}
@@ -231,7 +231,7 @@ static void update_switchable_interp_probs(VP9_COMP *cpi, vp9_writer *w) {
int i, j;
for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) {
vp9_tree_probs_from_distribution(vp9_switchable_interp_tree, branch_ct,
- cm->counts.switchable_interp[j], 0);
+ cm->counts.switchable_interp[j]);
for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
vp9_cond_prob_diff_update(w, &cm->fc.switchable_interp_prob[j][i],
@@ -250,7 +250,7 @@ static void update_inter_mode_probs(VP9_COMMON *cm, vp9_writer *w) {
for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
unsigned int branch_ct[INTER_MODES - 1][2];
vp9_tree_probs_from_distribution(vp9_inter_mode_tree, branch_ct,
- cm->counts.inter_mode[i], NEARESTMV);
+ cm->counts.inter_mode[i]);
for (j = 0; j < INTER_MODES - 1; ++j)
vp9_cond_prob_diff_update(w, &cm->fc.inter_mode_probs[i][j],
@@ -258,15 +258,15 @@ static void update_inter_mode_probs(VP9_COMMON *cm, vp9_writer *w) {
}
}
-static void pack_mb_tokens(vp9_writer* const bc,
+static void pack_mb_tokens(vp9_writer* const w,
TOKENEXTRA **tp,
const TOKENEXTRA *const stop) {
TOKENEXTRA *p = *tp;
while (p < stop && p->token != EOSB_TOKEN) {
const int t = p->token;
- const struct vp9_token *const a = vp9_coef_encodings + t;
- const vp9_extra_bit *const b = vp9_extra_bits + t;
+ const struct vp9_token *const a = &vp9_coef_encodings[t];
+ const vp9_extra_bit *const b = &vp9_extra_bits[t];
int i = 0;
const vp9_prob *pp;
int v = a->value;
@@ -289,7 +289,7 @@ static void pack_mb_tokens(vp9_writer* const bc,
do {
const int bb = (v >> --n) & 1;
- vp9_write(bc, bb, pp[i >> 1]);
+ vp9_write(w, bb, pp[i >> 1]);
i = vp9_coef_tree[i + bb];
} while (n);
@@ -304,12 +304,12 @@ static void pack_mb_tokens(vp9_writer* const bc,
do {
const int bb = (v >> --n) & 1;
- vp9_write(bc, bb, pb[i >> 1]);
+ vp9_write(w, bb, pb[i >> 1]);
i = b->tree[i + bb];
} while (n);
}
- vp9_write_bit(bc, e & 1);
+ vp9_write_bit(w, e & 1);
}
++p;
}
@@ -321,7 +321,7 @@ static void write_sb_mv_ref(vp9_writer *w, MB_PREDICTION_MODE mode,
const vp9_prob *p) {
assert(is_inter_mode(mode));
write_token(w, vp9_inter_mode_tree, p,
- &vp9_inter_mode_encodings[inter_mode_offset(mode)]);
+ &vp9_inter_mode_encodings[INTER_OFFSET(mode)]);
}
@@ -448,7 +448,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
if (bsize >= BLOCK_8X8) {
write_sb_mv_ref(bc, mode, mv_ref_p);
++cm->counts.inter_mode[mi->mode_context[rf]]
- [inter_mode_offset(mode)];
+ [INTER_OFFSET(mode)];
}
}
@@ -471,7 +471,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
const MB_PREDICTION_MODE blockmode = m->bmi[j].as_mode;
write_sb_mv_ref(bc, blockmode, mv_ref_p);
++cm->counts.inter_mode[mi->mode_context[rf]]
- [inter_mode_offset(blockmode)];
+ [INTER_OFFSET(blockmode)];
if (blockmode == NEWMV) {
#ifdef ENTROPY_STATS
@@ -545,37 +545,33 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8,
}
static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
- MODE_INFO **mi_8x8, vp9_writer *bc,
- TOKENEXTRA **tok, TOKENEXTRA *tok_end,
- int mi_row, int mi_col, int index) {
+ vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
+ int mi_row, int mi_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- MODE_INFO *m = mi_8x8[0];
-
- if (m->mbmi.sb_type < BLOCK_8X8)
- if (index > 0)
- return;
+ MODE_INFO *m;
- xd->mi_8x8 = mi_8x8;
+ xd->mi_8x8 = cm->mi_grid_visible + (mi_row * cm->mode_info_stride + mi_col);
+ m = xd->mi_8x8[0];
set_mi_row_col(xd, tile,
mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type],
mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type],
cm->mi_rows, cm->mi_cols);
if (frame_is_intra_only(cm)) {
- write_mb_modes_kf(cpi, mi_8x8, bc);
+ write_mb_modes_kf(cpi, xd->mi_8x8, w);
#ifdef ENTROPY_STATS
active_section = 8;
#endif
} else {
- pack_inter_mode_mvs(cpi, m, bc);
+ pack_inter_mode_mvs(cpi, m, w);
#ifdef ENTROPY_STATS
active_section = 1;
#endif
}
assert(*tok < tok_end);
- pack_mb_tokens(bc, tok, tok_end);
+ pack_mb_tokens(w, tok, tok_end);
}
static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col,
@@ -602,59 +598,50 @@ static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col,
}
static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
- MODE_INFO **mi_8x8, vp9_writer *bc,
- TOKENEXTRA **tok, TOKENEXTRA *tok_end,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- int index) {
+ vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
+ int mi_row, int mi_col, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
- const int mis = cm->mode_info_stride;
- int bsl = b_width_log2(bsize);
- int bs = (1 << bsl) / 4; // mode_info step for subsize
- int n;
- PARTITION_TYPE partition = PARTITION_NONE;
+ const int bsl = b_width_log2(bsize);
+ const int bs = (1 << bsl) / 4;
+ PARTITION_TYPE partition;
BLOCK_SIZE subsize;
- MODE_INFO *m = mi_8x8[0];
+ MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mode_info_stride + mi_col];
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
partition = partition_lookup[bsl][m->mbmi.sb_type];
-
- if (bsize < BLOCK_8X8) {
- if (index > 0)
- return;
- } else {
- write_partition(cpi, bs, mi_row, mi_col, partition, bsize, bc);
- }
-
+ write_partition(cpi, bs, mi_row, mi_col, partition, bsize, w);
subsize = get_subsize(bsize, partition);
-
- switch (partition) {
- case PARTITION_NONE:
- write_modes_b(cpi, tile, mi_8x8, bc, tok, tok_end, mi_row, mi_col, 0);
- break;
- case PARTITION_HORZ:
- write_modes_b(cpi, tile, mi_8x8, bc, tok, tok_end, mi_row, mi_col, 0);
- if ((mi_row + bs) < cm->mi_rows)
- write_modes_b(cpi, tile, mi_8x8 + bs * mis, bc, tok, tok_end,
- mi_row + bs, mi_col, 1);
- break;
- case PARTITION_VERT:
- write_modes_b(cpi, tile, mi_8x8, bc, tok, tok_end, mi_row, mi_col, 0);
- if ((mi_col + bs) < cm->mi_cols)
- write_modes_b(cpi, tile, mi_8x8 + bs, bc, tok, tok_end,
- mi_row, mi_col + bs, 1);
- break;
- case PARTITION_SPLIT:
- for (n = 0; n < 4; n++) {
- const int j = n >> 1, i = n & 1;
- write_modes_sb(cpi, tile, mi_8x8 + j * bs * mis + i * bs, bc,
- tok, tok_end,
- mi_row + j * bs, mi_col + i * bs, subsize, n);
- }
- break;
- default:
- assert(0);
+ if (subsize < BLOCK_8X8) {
+ write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ } else {
+ switch (partition) {
+ case PARTITION_NONE:
+ write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ break;
+ case PARTITION_HORZ:
+ write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ if (mi_row + bs < cm->mi_rows)
+ write_modes_b(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col);
+ break;
+ case PARTITION_VERT:
+ write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ if (mi_col + bs < cm->mi_cols)
+ write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs);
+ break;
+ case PARTITION_SPLIT:
+ write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize);
+ write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs,
+ subsize);
+ write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col,
+ subsize);
+ write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
+ subsize);
+ break;
+ default:
+ assert(0);
+ }
}
// update partition context
@@ -665,25 +652,15 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
}
static void write_modes(VP9_COMP *cpi, const TileInfo *const tile,
- vp9_writer* const bc,
- TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
- VP9_COMMON *const cm = &cpi->common;
- const int mis = cm->mode_info_stride;
+ vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
int mi_row, mi_col;
- MODE_INFO **mi_8x8 = cm->mi_grid_visible;
- MODE_INFO **m_8x8;
-
- mi_8x8 += tile->mi_col_start + tile->mi_row_start * mis;
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
- mi_row += 8, mi_8x8 += 8 * mis) {
- m_8x8 = mi_8x8;
- vp9_zero(cpi->left_seg_context);
+ mi_row += MI_BLOCK_SIZE) {
+ vp9_zero(cpi->left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
- mi_col += MI_BLOCK_SIZE, m_8x8 += MI_BLOCK_SIZE) {
- write_modes_sb(cpi, tile, m_8x8, bc, tok, tok_end, mi_row, mi_col,
- BLOCK_64X64, 0);
- }
+ mi_col += MI_BLOCK_SIZE)
+ write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64);
}
}
@@ -703,7 +680,7 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) {
continue;
vp9_tree_probs_from_distribution(vp9_coef_tree,
coef_branch_ct[i][j][k][l],
- coef_counts[i][j][k][l], 0);
+ coef_counts[i][j][k][l]);
coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] -
coef_branch_ct[i][j][k][l][0][0];
for (m = 0; m < UNCONSTRAINED_NODES; ++m)
@@ -1217,7 +1194,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
TileInfo tile;
- vp9_tile_init(&tile, cm, 0, tile_col);
+ vp9_tile_init(&tile, cm, tile_row, tile_col);
tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col];
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
diff --git a/libvpx/vp9/encoder/vp9_block.h b/libvpx/vp9/encoder/vp9_block.h
index 8033a4d..4445970 100644
--- a/libvpx/vp9/encoder/vp9_block.h
+++ b/libvpx/vp9/encoder/vp9_block.h
@@ -27,6 +27,18 @@ typedef struct {
typedef struct {
MODE_INFO mic;
uint8_t *zcoeff_blk;
+ int16_t *coeff[MAX_MB_PLANE][3];
+ int16_t *qcoeff[MAX_MB_PLANE][3];
+ int16_t *dqcoeff[MAX_MB_PLANE][3];
+ uint16_t *eobs[MAX_MB_PLANE][3];
+
+ // dual buffer pointers, 0: in use, 1: best in store
+ int16_t *coeff_pbuf[MAX_MB_PLANE][3];
+ int16_t *qcoeff_pbuf[MAX_MB_PLANE][3];
+ int16_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
+ uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
+
+ int is_coded;
int num_4x4_blk;
int skip;
int_mv best_ref_mv;
@@ -57,7 +69,7 @@ typedef struct {
struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
- DECLARE_ALIGNED(16, int16_t, coeff[64 * 64]);
+ int16_t *coeff;
struct buf_2d src;
// Quantizer setings
@@ -81,6 +93,10 @@ struct macroblock {
MACROBLOCKD e_mbd;
int skip_block;
+ int select_txfm_size;
+ int skip_recode;
+ int skip_optimize;
+ int q_index;
search_site *ss;
int ss_count;
@@ -120,6 +136,11 @@ struct macroblock {
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
+ unsigned char sb_index; // index of 32x32 block inside the 64x64 block
+ unsigned char mb_index; // index of 16x16 block inside the 32x32 block
+ unsigned char b_index; // index of 8x8 block inside the 16x16 block
+ unsigned char ab_index; // index of 4x4 block inside the 8x8 block
+
// These define limits to motion vector components to prevent them
// from extending outside the UMV borders
int mv_col_min;
@@ -179,35 +200,33 @@ struct macroblock {
// refactoring on organizing the temporary buffers, when recursive
// partition down to 4x4 block size is enabled.
static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
-
switch (bsize) {
case BLOCK_64X64:
return &x->sb64_context;
case BLOCK_64X32:
- return &x->sb64x32_context[xd->sb_index];
+ return &x->sb64x32_context[x->sb_index];
case BLOCK_32X64:
- return &x->sb32x64_context[xd->sb_index];
+ return &x->sb32x64_context[x->sb_index];
case BLOCK_32X32:
- return &x->sb32_context[xd->sb_index];
+ return &x->sb32_context[x->sb_index];
case BLOCK_32X16:
- return &x->sb32x16_context[xd->sb_index][xd->mb_index];
+ return &x->sb32x16_context[x->sb_index][x->mb_index];
case BLOCK_16X32:
- return &x->sb16x32_context[xd->sb_index][xd->mb_index];
+ return &x->sb16x32_context[x->sb_index][x->mb_index];
case BLOCK_16X16:
- return &x->mb_context[xd->sb_index][xd->mb_index];
+ return &x->mb_context[x->sb_index][x->mb_index];
case BLOCK_16X8:
- return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];
+ return &x->sb16x8_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_8X16:
- return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
+ return &x->sb8x16_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_8X8:
- return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];
+ return &x->sb8x8_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_8X4:
- return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];
+ return &x->sb8x4_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_4X8:
- return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];
+ return &x->sb4x8_context[x->sb_index][x->mb_index][x->b_index];
case BLOCK_4X4:
- return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];
+ return &x->ab4x4_context[x->sb_index][x->mb_index][x->b_index];
default:
assert(0);
return NULL;
diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c
index a45299b..3e75f3b 100644
--- a/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -50,25 +50,25 @@
int enc_debug = 0;
#endif
-static INLINE uint8_t *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE subsize) {
+static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) {
switch (subsize) {
case BLOCK_64X64:
case BLOCK_64X32:
case BLOCK_32X64:
case BLOCK_32X32:
- return &xd->sb_index;
+ return &x->sb_index;
case BLOCK_32X16:
case BLOCK_16X32:
case BLOCK_16X16:
- return &xd->mb_index;
+ return &x->mb_index;
case BLOCK_16X8:
case BLOCK_8X16:
case BLOCK_8X8:
- return &xd->b_index;
+ return &x->b_index;
case BLOCK_8X4:
case BLOCK_4X8:
case BLOCK_4X4:
- return &xd->ab_index;
+ return &x->ab_index;
default:
assert(0);
return NULL;
@@ -367,6 +367,8 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
MODE_INFO *mi = &ctx->mic;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
MODE_INFO *mi_addr = xd->mi_8x8[0];
@@ -375,6 +377,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int max_plane;
assert(mi->mbmi.mode < MB_MODE_COUNT);
assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES);
@@ -383,6 +386,21 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
*mi_addr = *mi;
+ max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
+ for (i = 0; i < max_plane; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][1];
+ pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
+ pd[i].eobs = ctx->eobs_pbuf[i][1];
+ }
+
+ for (i = max_plane; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][2];
+ pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
+ pd[i].eobs = ctx->eobs_pbuf[i][2];
+ }
+
// Restore the coding context of the MB to that that was in place
// when the mode was picked for it
for (y = 0; y < mi_height; y++)
@@ -578,6 +596,9 @@ static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ int i;
int orig_rdmult = x->rdmult;
double rdmult_ratio;
@@ -590,7 +611,7 @@ static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
if (bsize < BLOCK_8X8) {
// When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
// there is nothing to be done.
- if (xd->ab_index != 0) {
+ if (x->ab_index != 0) {
*totalrate = 0;
*totaldist = 0;
return;
@@ -600,6 +621,15 @@ static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
set_offsets(cpi, tile, mi_row, mi_col, bsize);
xd->mi_8x8[0]->mbmi.sb_type = bsize;
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][0];
+ pd[i].qcoeff = ctx->qcoeff_pbuf[i][0];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
+ pd[i].eobs = ctx->eobs_pbuf[i][0];
+ }
+ ctx->is_coded = 0;
+ x->skip_recode = 0;
+
// Set to zero to make sure we do not use the previous encoded frame stats
xd->mi_8x8[0]->mbmi.skip_coeff = 0;
@@ -687,16 +717,15 @@ static void update_stats(VP9_COMP *cpi) {
}
static BLOCK_SIZE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
switch (bsize) {
case BLOCK_64X64:
return &x->sb64_partitioning;
case BLOCK_32X32:
- return &x->sb_partitioning[xd->sb_index];
+ return &x->sb_partitioning[x->sb_index];
case BLOCK_16X16:
- return &x->mb_partitioning[xd->sb_index][xd->mb_index];
+ return &x->mb_partitioning[x->sb_index][x->mb_index];
case BLOCK_8X8:
- return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index];
+ return &x->b_partitioning[x->sb_index][x->mb_index][x->b_index];
default:
assert(0);
return NULL;
@@ -769,20 +798,19 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
TOKENEXTRA **tp, int mi_row, int mi_col,
int output_enabled, BLOCK_SIZE bsize, int sub_index) {
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCK * const x = &cpi->mb;
- MACROBLOCKD * const xd = &x->e_mbd;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
if (sub_index != -1)
- *get_sb_index(xd, bsize) = sub_index;
+ *get_sb_index(x, bsize) = sub_index;
if (bsize < BLOCK_8X8) {
// When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
// there is nothing to be done.
- if (xd->ab_index > 0)
+ if (x->ab_index > 0)
return;
}
set_offsets(cpi, tile, mi_row, mi_col, bsize);
@@ -800,9 +828,8 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
TOKENEXTRA **tp, int mi_row, int mi_col,
int output_enabled, BLOCK_SIZE bsize) {
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCK * const x = &cpi->mb;
- MACROBLOCKD * const xd = &x->e_mbd;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
BLOCK_SIZE c1 = BLOCK_8X8;
const int bsl = b_width_log2(bsize), bs = (1 << bsl) / 4;
int pl = 0;
@@ -848,7 +875,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
for (i = 0; i < 4; i++) {
const int x_idx = i & 1, y_idx = i >> 1;
- *get_sb_index(xd, subsize) = i;
+ *get_sb_index(x, subsize) = i;
encode_sb(cpi, tile, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
output_enabled, subsize);
}
@@ -975,9 +1002,8 @@ static void rd_use_partition(VP9_COMP *cpi,
TOKENEXTRA **tp, int mi_row, int mi_col,
BLOCK_SIZE bsize, int *rate, int64_t *dist,
int do_recon) {
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCK * const x = &cpi->mb;
- MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
const int mis = cm->mode_info_stride;
int bsl = b_width_log2(bsize);
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
@@ -1012,7 +1038,7 @@ static void rd_use_partition(VP9_COMP *cpi,
if (bsize < BLOCK_8X8) {
// When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
// there is nothing to be done.
- if (xd->ab_index != 0) {
+ if (x->ab_index != 0) {
*rate = 0;
*dist = 0;
return;
@@ -1070,7 +1096,7 @@ static void rd_use_partition(VP9_COMP *cpi,
bsize, get_block_context(x, bsize), INT64_MAX);
break;
case PARTITION_HORZ:
- *get_sb_index(xd, subsize) = 0;
+ *get_sb_index(x, subsize) = 0;
pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
subsize, get_block_context(x, subsize), INT64_MAX);
if (last_part_rate != INT_MAX &&
@@ -1079,7 +1105,7 @@ static void rd_use_partition(VP9_COMP *cpi,
int64_t dt = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
- *get_sb_index(xd, subsize) = 1;
+ *get_sb_index(x, subsize) = 1;
pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt, subsize,
get_block_context(x, subsize), INT64_MAX);
if (rt == INT_MAX || dt == INT_MAX) {
@@ -1093,7 +1119,7 @@ static void rd_use_partition(VP9_COMP *cpi,
}
break;
case PARTITION_VERT:
- *get_sb_index(xd, subsize) = 0;
+ *get_sb_index(x, subsize) = 0;
pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
subsize, get_block_context(x, subsize), INT64_MAX);
if (last_part_rate != INT_MAX &&
@@ -1102,7 +1128,7 @@ static void rd_use_partition(VP9_COMP *cpi,
int64_t dt = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
- *get_sb_index(xd, subsize) = 1;
+ *get_sb_index(x, subsize) = 1;
pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt, subsize,
get_block_context(x, subsize), INT64_MAX);
if (rt == INT_MAX || dt == INT_MAX) {
@@ -1128,7 +1154,7 @@ static void rd_use_partition(VP9_COMP *cpi,
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
- *get_sb_index(xd, subsize) = i;
+ *get_sb_index(x, subsize) = i;
rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
@@ -1169,11 +1195,10 @@ static void rd_use_partition(VP9_COMP *cpi,
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
- if ((mi_row + y_idx >= cm->mi_rows)
- || (mi_col + x_idx >= cm->mi_cols))
+ if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
- *get_sb_index(xd, split_subsize) = i;
+ *get_sb_index(x, split_subsize) = i;
*get_sb_partitioning(x, bsize) = split_subsize;
*get_sb_partitioning(x, split_subsize) = split_subsize;
@@ -1353,7 +1378,6 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
static void compute_fast_motion_search_level(VP9_COMP *cpi, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
// Only use 8x8 result for non HD videos.
// int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0;
@@ -1366,9 +1390,9 @@ static void compute_fast_motion_search_level(VP9_COMP *cpi, BLOCK_SIZE bsize) {
PICK_MODE_CONTEXT *block_context = NULL;
if (bsize == BLOCK_16X16) {
- block_context = x->sb8x8_context[xd->sb_index][xd->mb_index];
+ block_context = x->sb8x8_context[x->sb_index][x->mb_index];
} else if (bsize == BLOCK_32X32) {
- block_context = x->mb_context[xd->sb_index];
+ block_context = x->mb_context[x->sb_index];
} else if (bsize == BLOCK_64X64) {
block_context = x->sb32_context;
}
@@ -1456,9 +1480,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
TOKENEXTRA **tp, int mi_row,
int mi_col, BLOCK_SIZE bsize, int *rate,
int64_t *dist, int do_recon, int64_t best_rd) {
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCK * const x = &cpi->mb;
- MACROBLOCKD * const xd = &x->e_mbd;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
@@ -1484,7 +1507,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (bsize < BLOCK_8X8) {
// When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
// there is nothing to be done.
- if (xd->ab_index != 0) {
+ if (x->ab_index != 0) {
*rate = 0;
*dist = 0;
return;
@@ -1582,7 +1605,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
- *get_sb_index(xd, subsize) = i;
+ *get_sb_index(x, subsize) = i;
if (cpi->sf.adaptive_motion_search)
load_pred_mv(x, get_block_context(x, bsize));
rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize,
@@ -1629,7 +1652,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
// PARTITION_HORZ
if (partition_horz_allowed && do_rect) {
subsize = get_subsize(bsize, PARTITION_HORZ);
- *get_sb_index(xd, subsize) = 0;
+ *get_sb_index(x, subsize) = 0;
if (cpi->sf.adaptive_motion_search)
load_pred_mv(x, get_block_context(x, bsize));
pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
@@ -1640,7 +1663,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
- *get_sb_index(xd, subsize) = 1;
+ *get_sb_index(x, subsize) = 1;
if (cpi->sf.adaptive_motion_search)
load_pred_mv(x, get_block_context(x, bsize));
pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
@@ -1674,7 +1697,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (partition_vert_allowed && do_rect) {
subsize = get_subsize(bsize, PARTITION_VERT);
- *get_sb_index(xd, subsize) = 0;
+ *get_sb_index(x, subsize) = 0;
if (cpi->sf.adaptive_motion_search)
load_pred_mv(x, get_block_context(x, bsize));
pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
@@ -1684,7 +1707,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
- *get_sb_index(xd, subsize) = 1;
+ *get_sb_index(x, subsize) = 1;
if (cpi->sf.adaptive_motion_search)
load_pred_mv(x, get_block_context(x, bsize));
pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
@@ -1765,7 +1788,7 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile,
}
static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp, int *totalrate) {
+ int mi_row, TOKENEXTRA **tp) {
VP9_COMMON * const cm = &cpi->common;
int mi_col;
@@ -1910,7 +1933,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
MACROBLOCK * const x = &cpi->mb;
VP9_COMMON * const cm = &cpi->common;
MACROBLOCKD * const xd = &x->e_mbd;
- int totalrate;
// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
// cpi->common.current_video_frame, cpi->common.show_frame,
@@ -1926,8 +1948,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
#endif
- totalrate = 0;
-
vp9_zero(cm->counts.switchable_interp);
vp9_zero(cpi->tx_stepdown_count);
@@ -1989,7 +2009,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start;
mi_row < tile.mi_row_end; mi_row += 8)
- encode_sb_row(cpi, &tile, mi_row, &tp, &totalrate);
+ encode_sb_row(cpi, &tile, mi_row, &tp);
cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
@@ -2015,10 +2035,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
cpi->sf.skip_encode_frame = 0;
}
- // 256 rate units to the bit,
- // projected_frame_size in units of BYTES
- cpi->projected_frame_size = totalrate >> 8;
-
#if 0
// Keep record of the total distortion this time around for future use
cpi->last_frame_distortion = cpi->frame_distortion;
@@ -2395,13 +2411,17 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
MODE_INFO **mi_8x8 = xd->mi_8x8;
MODE_INFO *mi = mi_8x8[0];
MB_MODE_INFO *mbmi = &mi->mbmi;
+ PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
unsigned int segment_id = mbmi->segment_id;
const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8;
+ x->skip_optimize = ctx->is_coded;
+ ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
- xd->q_index < QIDX_SKIP_THRESH);
+ x->q_index < QIDX_SKIP_THRESH);
if (x->skip_encode)
return;
@@ -2487,7 +2507,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
(mbmi->skip_coeff ||
vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
const uint8_t context = vp9_get_pred_context_tx_size(xd);
- ++get_tx_counts(bsize, context, &cm->counts.tx)[mbmi->tx_size];
+ ++get_tx_counts(max_txsize_lookup[bsize],
+ context, &cm->counts.tx)[mbmi->tx_size];
} else {
int x, y;
TX_SIZE sz = tx_mode_to_biggest_tx_size[cm->tx_mode];
diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c
index 75ed8ea..a85ddee 100644
--- a/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/libvpx/vp9/encoder/vp9_encodemb.c
@@ -136,14 +136,13 @@ static void optimize_b(MACROBLOCK *mb,
const int16_t *scan, *nb;
const int mul = 1 + (tx_size == TX_32X32);
uint8_t token_cache[1024];
- const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block);
const int16_t *dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
assert((!type && !plane) || (type && plane));
dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
- get_scan(xd, tx_size, type, ib, &scan, &nb);
+ get_scan(xd, tx_size, type, block, &scan, &nb);
assert(eob <= default_eob);
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
@@ -179,7 +178,7 @@ static void optimize_b(MACROBLOCK *mb,
t0 = (vp9_dct_value_tokens_ptr + x)->token;
/* Consider both possible successor states. */
if (next < default_eob) {
- band = get_coef_band(band_translate, i + 1);
+ band = band_translate[i + 1];
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 +=
mb->token_costs[tx_size][type][ref][band][0][pt]
@@ -230,7 +229,7 @@ static void optimize_b(MACROBLOCK *mb,
t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
}
if (next < default_eob) {
- band = get_coef_band(band_translate, i + 1);
+ band = band_translate[i + 1];
if (t0 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
@@ -264,7 +263,7 @@ static void optimize_b(MACROBLOCK *mb,
/* There's no choice to make for a zero coefficient, so we don't
* add a new trellis node, but we do need to update the costs.
*/
- band = get_coef_band(band_translate, i + 1);
+ band = band_translate[i + 1];
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
/* Update the cost of each path if we're past the EOB token. */
@@ -284,7 +283,7 @@ static void optimize_b(MACROBLOCK *mb,
}
/* Now pick the best path through the whole trellis. */
- band = get_coef_band(band_translate, i + 1);
+ band = band_translate[i + 1];
pt = combine_entropy_contexts(*a, *l);
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
@@ -420,28 +419,30 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx *const ctx = args->ctx;
struct macroblockd_plane *const pd = &xd->plane[plane];
- const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size,
- block);
-
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
- pd->dst.buf, pd->dst.stride);
+ int i, j;
+ uint8_t *dst;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+ dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
if (x->zcoeff_blk[tx_size][block] && plane == 0) {
- int i, j;
pd->eobs[block] = 0;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
ctx->ta[plane][i] = 0;
ctx->tl[plane][j] = 0;
return;
}
- vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
+ if (!x->skip_recode)
+ vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
- if (x->optimize)
+ if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
+ } else {
+ ctx->ta[plane][i] = pd->eobs[block] > 0;
+ ctx->tl[plane][j] = pd->eobs[block] > 0;
+ }
if (x->skip_encode || pd->eobs[block] == 0)
return;
@@ -505,9 +506,10 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
struct optimize_ctx ctx;
struct encode_b_args arg = {x, &ctx};
- vp9_subtract_sb(x, bsize);
+ if (!x->skip_recode)
+ vp9_subtract_sb(x, bsize);
- if (x->optimize) {
+ if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
int i;
for (i = 0; i < MAX_MB_PLANE; ++i)
optimize_init_b(i, bsize, &arg);
@@ -552,19 +554,22 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 32 * (block & twmask);
yoff = 32 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
- src = p->src.buf + yoff * p->src.stride + xoff;
- src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
- vp9_subtract_block(32, 32, src_diff, bw * 4,
- src, p->src.stride, dst, pd->dst.stride);
- if (x->use_lp32x32fdct)
- vp9_fdct32x32_rd(src_diff, coeff, bw * 4);
- else
- vp9_fdct32x32(src_diff, coeff, bw * 4);
- vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan, iscan);
+
+ if (!x->skip_recode) {
+ src = p->src.buf + yoff * p->src.stride + xoff;
+ src_diff = p->src_diff + 4 * bw * yoff + xoff;
+ vp9_subtract_block(32, 32, src_diff, bw * 4,
+ src, p->src.stride, dst, pd->dst.stride);
+ if (x->use_lp32x32fdct)
+ vp9_fdct32x32_rd(src_diff, coeff, bw * 4);
+ else
+ vp9_fdct32x32(src_diff, coeff, bw * 4);
+ vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan, iscan);
+ }
if (!x->skip_encode && *eob)
vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob);
break;
@@ -577,16 +582,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 16 * (block & twmask);
yoff = 16 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
- src = p->src.buf + yoff * p->src.stride + xoff;
- src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
- vp9_subtract_block(16, 16, src_diff, bw * 4,
- src, p->src.stride, dst, pd->dst.stride);
- vp9_fht16x16(tx_type, src_diff, coeff, bw * 4);
- vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan, iscan);
+ if (!x->skip_recode) {
+ src = p->src.buf + yoff * p->src.stride + xoff;
+ src_diff = p->src_diff + 4 * bw * yoff + xoff;
+ vp9_subtract_block(16, 16, src_diff, bw * 4,
+ src, p->src.stride, dst, pd->dst.stride);
+ vp9_fht16x16(tx_type, src_diff, coeff, bw * 4);
+ vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan, iscan);
+ }
if (!x->skip_encode && *eob)
vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break;
@@ -599,16 +606,18 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 8 * (block & twmask);
yoff = 8 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
- src = p->src.buf + yoff * p->src.stride + xoff;
- src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
- vp9_subtract_block(8, 8, src_diff, bw * 4,
- src, p->src.stride, dst, pd->dst.stride);
- vp9_fht8x8(tx_type, src_diff, coeff, bw * 4);
- vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan, iscan);
+ if (!x->skip_recode) {
+ src = p->src.buf + yoff * p->src.stride + xoff;
+ src_diff = p->src_diff + 4 * bw * yoff + xoff;
+ vp9_subtract_block(8, 8, src_diff, bw * 4,
+ src, p->src.stride, dst, pd->dst.stride);
+ vp9_fht8x8(tx_type, src_diff, coeff, bw * 4);
+ vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
+ p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan, iscan);
+ }
if (!x->skip_encode && *eob)
vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
break;
@@ -624,19 +633,23 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
xoff = 4 * (block & twmask);
yoff = 4 * (block >> twl);
dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
- src = p->src.buf + yoff * p->src.stride + xoff;
- src_diff = p->src_diff + 4 * bw * yoff + xoff;
vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
dst, pd->dst.stride, dst, pd->dst.stride);
- vp9_subtract_block(4, 4, src_diff, bw * 4,
- src, p->src.stride, dst, pd->dst.stride);
- if (tx_type != DCT_DCT)
- vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
- else
- x->fwd_txm4x4(src_diff, coeff, bw * 4);
- vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff,
- pd->dequant, p->zbin_extra, eob, scan, iscan);
+
+ if (!x->skip_recode) {
+ src = p->src.buf + yoff * p->src.stride + xoff;
+ src_diff = p->src_diff + 4 * bw * yoff + xoff;
+ vp9_subtract_block(4, 4, src_diff, bw * 4,
+ src, p->src.stride, dst, pd->dst.stride);
+ if (tx_type != DCT_DCT)
+ vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
+ else
+ x->fwd_txm4x4(src_diff, coeff, bw * 4);
+ vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
+ p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan, iscan);
+ }
+
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
// this is like vp9_short_idct4x4 but has a special case around eob<=1
diff --git a/libvpx/vp9/encoder/vp9_encodemv.c b/libvpx/vp9/encoder/vp9_encodemv.c
index e2c6c4c..030ca64 100644
--- a/libvpx/vp9/encoder/vp9_encodemv.c
+++ b/libvpx/vp9/encoder/vp9_encodemv.c
@@ -155,9 +155,8 @@ static void counts_to_nmv_context(
unsigned int (*branch_ct_class0_hp)[2],
unsigned int (*branch_ct_hp)[2]) {
int i, j, k;
- vp9_tree_probs_from_distribution(vp9_mv_joint_tree,
- branch_ct_joint,
- nmv_count->joints, 0);
+ vp9_tree_probs_from_distribution(vp9_mv_joint_tree, branch_ct_joint,
+ nmv_count->joints);
for (i = 0; i < 2; ++i) {
const uint32_t s0 = nmv_count->comps[i].sign[0];
const uint32_t s1 = nmv_count->comps[i].sign[1];
@@ -166,10 +165,10 @@ static void counts_to_nmv_context(
branch_ct_sign[i][1] = s1;
vp9_tree_probs_from_distribution(vp9_mv_class_tree,
branch_ct_classes[i],
- nmv_count->comps[i].classes, 0);
+ nmv_count->comps[i].classes);
vp9_tree_probs_from_distribution(vp9_mv_class0_tree,
branch_ct_class0[i],
- nmv_count->comps[i].class0, 0);
+ nmv_count->comps[i].class0);
for (j = 0; j < MV_OFFSET_BITS; ++j) {
const uint32_t b0 = nmv_count->comps[i].bits[j][0];
const uint32_t b1 = nmv_count->comps[i].bits[j][1];
@@ -182,11 +181,11 @@ static void counts_to_nmv_context(
for (k = 0; k < CLASS0_SIZE; ++k) {
vp9_tree_probs_from_distribution(vp9_mv_fp_tree,
branch_ct_class0_fp[i][k],
- nmv_count->comps[i].class0_fp[k], 0);
+ nmv_count->comps[i].class0_fp[k]);
}
vp9_tree_probs_from_distribution(vp9_mv_fp_tree,
branch_ct_fp[i],
- nmv_count->comps[i].fp, 0);
+ nmv_count->comps[i].fp);
}
if (usehp) {
for (i = 0; i < 2; ++i) {
diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c
index 6a3555d..974c300 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/libvpx/vp9/encoder/vp9_firstpass.c
@@ -482,6 +482,10 @@ void vp9_first_pass(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
TileInfo tile;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ PICK_MODE_CONTEXT *ctx = &x->sb64_context;
+ int i;
int recon_yoffset, recon_uvoffset;
const int lst_yv12_idx = cm->ref_frame_map[cpi->lst_fb_idx];
@@ -525,6 +529,15 @@ void vp9_first_pass(VP9_COMP *cpi) {
vp9_frame_init_quantizer(cpi);
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][1];
+ pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
+ pd[i].eobs = ctx->eobs_pbuf[i][1];
+ }
+ x->skip_recode = 0;
+
+
// Initialise the MV cost table to the defaults
// if( cm->current_video_frame == 0)
// if ( 0 )
diff --git a/libvpx/vp9/encoder/vp9_onyx_if.c b/libvpx/vp9/encoder/vp9_onyx_if.c
index f922f90..dd4705d 100644
--- a/libvpx/vp9/encoder/vp9_onyx_if.c
+++ b/libvpx/vp9/encoder/vp9_onyx_if.c
@@ -834,6 +834,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->adaptive_rd_thresh = 2;
sf->recode_loop = 2;
+ sf->use_lp32x32fdct = 1;
sf->mode_skip_start = 11;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
@@ -1436,90 +1437,121 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
} while (++i <= MV_MAX);
}
+static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
+ PICK_MODE_CONTEXT *ctx) {
+ int num_pix = num_4x4_blk << 4;
+ int i, k;
+ ctx->num_4x4_blk = num_4x4_blk;
+ CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
+ vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ for (k = 0; k < 3; ++k) {
+ CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
+ vpx_memalign(16, num_pix * sizeof(int16_t)));
+ CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
+ vpx_memalign(16, num_pix * sizeof(int16_t)));
+ CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
+ vpx_memalign(16, num_pix * sizeof(int16_t)));
+ CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
+ vpx_memalign(16, num_pix * sizeof(uint16_t)));
+ ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
+ ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
+ ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
+ ctx->eobs_pbuf[i][k] = ctx->eobs[i][k];
+ }
+ }
+}
+
+static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
+ int i, k;
+ vpx_free(ctx->zcoeff_blk);
+ ctx->zcoeff_blk = 0;
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ for (k = 0; k < 3; ++k) {
+ vpx_free(ctx->coeff[i][k]);
+ ctx->coeff[i][k] = 0;
+ vpx_free(ctx->qcoeff[i][k]);
+ ctx->qcoeff[i][k] = 0;
+ vpx_free(ctx->dqcoeff[i][k]);
+ ctx->dqcoeff[i][k] = 0;
+ vpx_free(ctx->eobs[i][k]);
+ ctx->eobs[i][k] = 0;
+ }
+ }
+}
+
static void init_pick_mode_context(VP9_COMP *cpi) {
int i;
- MACROBLOCK *x = &cpi->mb;
- MACROBLOCKD *xd = &x->e_mbd;
- VP9_COMMON *cm = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+
for (i = 0; i < BLOCK_SIZES; ++i) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
const int num_4x4_h = num_4x4_blocks_high_lookup[i];
const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
if (i < BLOCK_16X16) {
- for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {
- for (xd->mb_index = 0; xd->mb_index < 4; ++xd->mb_index) {
- for (xd->b_index = 0; xd->b_index < 16 / num_4x4_blk; ++xd->b_index) {
+ for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
+ for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) {
+ for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
- ctx->num_4x4_blk = num_4x4_blk;
- CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
- vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
+ alloc_mode_context(cm, num_4x4_blk, ctx);
}
}
}
} else if (i < BLOCK_32X32) {
- for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {
- for (xd->mb_index = 0; xd->mb_index < 64 / num_4x4_blk;
- ++xd->mb_index) {
+ for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
+ for (x->mb_index = 0; x->mb_index < 64 / num_4x4_blk; ++x->mb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
- CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
- vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
+ alloc_mode_context(cm, num_4x4_blk, ctx);
}
}
} else if (i < BLOCK_64X64) {
- for (xd->sb_index = 0; xd->sb_index < 256 / num_4x4_blk; ++xd->sb_index) {
+ for (x->sb_index = 0; x->sb_index < 256 / num_4x4_blk; ++x->sb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
- CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
- vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
+ alloc_mode_context(cm, num_4x4_blk, ctx);
}
} else {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
- CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
- vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
+ alloc_mode_context(cm, num_4x4_blk, ctx);
}
}
}
static void free_pick_mode_context(MACROBLOCK *x) {
int i;
- MACROBLOCKD *xd = &x->e_mbd;
for (i = 0; i < BLOCK_SIZES; ++i) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
const int num_4x4_h = num_4x4_blocks_high_lookup[i];
const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
if (i < BLOCK_16X16) {
- for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {
- for (xd->mb_index = 0; xd->mb_index < 4; ++xd->mb_index) {
- for (xd->b_index = 0; xd->b_index < 16 / num_4x4_blk; ++xd->b_index) {
+ for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
+ for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) {
+ for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
- vpx_free(ctx->zcoeff_blk);
- ctx->zcoeff_blk = 0;
+ free_mode_context(ctx);
}
}
}
} else if (i < BLOCK_32X32) {
- for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {
- for (xd->mb_index = 0; xd->mb_index < 64 / num_4x4_blk;
- ++xd->mb_index) {
+ for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) {
+ for (x->mb_index = 0; x->mb_index < 64 / num_4x4_blk; ++x->mb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
- vpx_free(ctx->zcoeff_blk);
- ctx->zcoeff_blk = 0;
+ free_mode_context(ctx);
}
}
} else if (i < BLOCK_64X64) {
- for (xd->sb_index = 0; xd->sb_index < 256 / num_4x4_blk; ++xd->sb_index) {
+ for (x->sb_index = 0; x->sb_index < 256 / num_4x4_blk; ++x->sb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
- vpx_free(ctx->zcoeff_blk);
- ctx->zcoeff_blk = 0;
+ free_mode_context(ctx);
}
} else {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
- vpx_free(ctx->zcoeff_blk);
- ctx->zcoeff_blk = 0;
+ free_mode_context(ctx);
}
}
}
@@ -3404,7 +3436,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Post encode loop adjustment of Q prediction.
if (!active_worst_qchanged)
- vp9_update_rate_correction_factors(cpi, (cpi->sf.recode_loop) ? 2 : 0);
+ vp9_update_rate_correction_factors(cpi, (cpi->sf.recode_loop ||
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
+
cpi->last_q[cm->frame_type] = cm->base_qindex;
diff --git a/libvpx/vp9/encoder/vp9_onyx_int.h b/libvpx/vp9/encoder/vp9_onyx_int.h
index 9429c7f..9e80212 100644
--- a/libvpx/vp9/encoder/vp9_onyx_int.h
+++ b/libvpx/vp9/encoder/vp9_onyx_int.h
@@ -312,7 +312,6 @@ typedef struct VP9_COMP {
VP9_COMMON common;
VP9_CONFIG oxcf;
struct rdcost_block_args rdcost_stack;
-
struct lookahead_ctx *lookahead;
struct lookahead_entry *source;
#if CONFIG_MULTIPLE_ARF
diff --git a/libvpx/vp9/encoder/vp9_quantize.c b/libvpx/vp9/encoder/vp9_quantize.c
index fca7525..d24be96 100644
--- a/libvpx/vp9/encoder/vp9_quantize.c
+++ b/libvpx/vp9/encoder/vp9_quantize.c
@@ -22,7 +22,7 @@
extern int enc_debug;
#endif
-void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
+void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
@@ -30,58 +30,44 @@ void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- int i, rc, eob;
- int zbins[2], nzbins[2], zbin;
- int x, y, z, sz;
- int zero_flag = n_coeffs;
+ int i, non_zero_count = count, eob = -1;
+ const int zbins[2] = { zbin_ptr[0] + zbin_oq_value,
+ zbin_ptr[1] + zbin_oq_value };
+ const int nzbins[2] = { zbins[0] * -1,
+ zbins[1] * -1 };
- vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
- vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
-
- eob = -1;
-
- // Base ZBIN
- zbins[0] = zbin_ptr[0] + zbin_oq_value;
- zbins[1] = zbin_ptr[1] + zbin_oq_value;
- nzbins[0] = zbins[0] * -1;
- nzbins[1] = zbins[1] * -1;
+ vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
+ vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
if (!skip_block) {
// Pre-scan pass
- for (i = n_coeffs - 1; i >= 0; i--) {
- rc = scan[i];
- z = coeff_ptr[rc];
+ for (i = count - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
- if (z < zbins[rc != 0] && z > nzbins[rc != 0]) {
- zero_flag--;
- } else {
+ if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
+ non_zero_count--;
+ else
break;
- }
}
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
- for (i = 0; i < zero_flag; i++) {
- rc = scan[i];
- z = coeff_ptr[rc];
-
- zbin = (zbins[rc != 0]);
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz;
-
- if (x >= zbin) {
- x += (round_ptr[rc != 0]);
- x = clamp(x, INT16_MIN, INT16_MAX);
- y = (((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) *
- quant_shift_ptr[rc != 0]) >> 16; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0]; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- }
+ for (i = 0; i < non_zero_count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ if (abs_coeff >= zbins[rc != 0]) {
+ int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+ quant_shift_ptr[rc != 0]) >> 16; // quantization
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+
+ if (tmp)
+ eob = i;
}
}
}
@@ -315,17 +301,17 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
SEG_LVL_SKIP);
/* save this macroblock QIndex for vp9_update_zbin_extra() */
- x->e_mbd.q_index = qindex;
+ x->q_index = qindex;
/* R/D setup */
cpi->mb.errorperbit = rdmult >> 6;
cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
- vp9_initialize_me_consts(cpi, xd->q_index);
+ vp9_initialize_me_consts(cpi, x->q_index);
}
void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) {
- const int qindex = x->e_mbd.q_index;
+ const int qindex = x->q_index;
const int y_zbin_extra = (cpi->common.y_dequant[qindex][1] *
(cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7;
const int uv_zbin_extra = (cpi->common.uv_dequant[qindex][1] *
diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c
index 993919e..78cb06b 100644
--- a/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/libvpx/vp9/encoder/vp9_rdopt.c
@@ -246,6 +246,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
vp9_set_speed_features(cpi);
+ cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
+ cm->frame_type != KEY_FRAME) ?
+ 0 : 1;
+
set_block_thresholds(cpi);
fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
@@ -268,10 +272,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
MB_PREDICTION_MODE m;
for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
- cpi->mb.inter_mode_cost[i][inter_mode_offset(m)] =
+ cpi->mb.inter_mode_cost[i][INTER_OFFSET(m)] =
cost_token(vp9_inter_mode_tree,
cm->fc.inter_mode_probs[i],
- &vp9_inter_mode_encodings[inter_mode_offset(m)]);
+ &vp9_inter_mode_encodings[INTER_OFFSET(m)]);
}
}
}
@@ -609,7 +613,7 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
// TODO(jingning): temporarily enabled only for luma component
rd = MIN(rd1, rd2);
- if (plane == 0)
+ if (!xd->lossless && plane == 0)
x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block];
args->this_rate += args->rate;
@@ -740,7 +744,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
int n, m;
int s0, s1;
- const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs, xd->mi_8x8[0]);
+ const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
for (n = TX_4X4; n <= max_tx_size; n++) {
r[n][1] = r[n][0];
@@ -845,7 +849,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
// double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
- const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs, xd->mi_8x8[0]);
+ const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
// for (n = TX_4X4; n <= max_txfm_size; n++)
// r[n][0] = (r[n][0] * scale_r[n]);
@@ -1326,6 +1330,7 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
}
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
+ PICK_MODE_CONTEXT *ctx,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize) {
@@ -1361,6 +1366,27 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
+ if (!x->select_txfm_size) {
+ int i;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = x->e_mbd.plane;
+ for (i = 1; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][2];
+ pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
+ pd[i].eobs = ctx->eobs_pbuf[i][2];
+
+ ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
+ ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
+ ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
+ ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
+
+ ctx->coeff_pbuf[i][0] = p[i].coeff;
+ ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff;
+ ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
+ ctx->eobs_pbuf[i][0] = pd[i].eobs;
+ }
+ }
}
}
@@ -1386,8 +1412,9 @@ static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
return this_rd;
}
-static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize,
- int *rate_uv, int *rate_uv_tokenonly,
+static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
+ BLOCK_SIZE bsize, int *rate_uv,
+ int *rate_uv_tokenonly,
int64_t *dist_uv, int *skip_uv,
MB_PREDICTION_MODE *mode_uv) {
MACROBLOCK *const x = &cpi->mb;
@@ -1400,7 +1427,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize,
// Else do a proper rd search for each possible transform size that may
// be considered in the main rd loop.
} else {
- rd_pick_intra_sbuv_mode(cpi, x,
+ rd_pick_intra_sbuv_mode(cpi, x, ctx,
rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
}
@@ -1416,7 +1443,7 @@ static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
// Don't account for mode here if segment skip is enabled.
if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
assert(is_inter_mode(mode));
- return x->inter_mode_cost[mode_context][inter_mode_offset(mode)];
+ return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
} else {
return 0;
}
@@ -1707,7 +1734,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
const struct buf_2d orig_src = x->plane[0].src;
struct buf_2d orig_pre[2];
- mode_idx = inter_mode_offset(this_mode);
+ mode_idx = INTER_OFFSET(this_mode);
bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
// if we're near/nearest and mv == 0,0, compare to zeromv
@@ -1901,6 +1928,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
x->mvcost, cpi);
+
bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
@@ -2002,7 +2030,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
return;
}
- mode_idx = inter_mode_offset(mode_selected);
+ mode_idx = INTER_OFFSET(mode_selected);
vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
@@ -2078,7 +2106,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
return INT64_MAX;
/* set it to the best */
for (i = 0; i < 4; i++) {
- mode_idx = inter_mode_offset(bsi->modes[i]);
+ mode_idx = INTER_OFFSET(bsi->modes[i]);
mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
if (has_second_ref(mbmi))
mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
@@ -2477,54 +2505,41 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- int refs[2] = { mbmi->ref_frame[0],
- (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
+ const int refs[2] = { mbmi->ref_frame[0],
+ mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
int_mv ref_mv[2];
const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
- int ite;
+ int ite, ref;
// Prediction buffer from second frame.
uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
// Do joint motion search in compound mode to get more accurate mv.
- struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
- struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
- struct buf_2d scaled_first_yv12;
+ struct buf_2d backup_yv12[2][MAX_MB_PLANE];
+ struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
int last_besterr[2] = {INT_MAX, INT_MAX};
- YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
- scaled_ref_frame[0] = get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
- scaled_ref_frame[1] = get_scaled_ref_frame(cpi, mbmi->ref_frame[1]);
-
- ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
- ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
-
- if (scaled_ref_frame[0]) {
- int i;
- // Swap out the reference frame for a version that's been scaled to
- // match the resolution of the current frame, allowing the existing
- // motion search code to be used without additional modifications.
- for (i = 0; i < MAX_MB_PLANE; i++)
- backup_yv12[i] = xd->plane[i].pre[0];
- setup_pre_planes(xd, 0, scaled_ref_frame[0], mi_row, mi_col, NULL);
- }
+ YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
+ get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
+ get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
+ };
- if (scaled_ref_frame[1]) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- backup_second_yv12[i] = xd->plane[i].pre[1];
+ for (ref = 0; ref < 2; ++ref) {
+ ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
+
+ if (scaled_ref_frame[ref]) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[ref][i] = xd->plane[i].pre[ref];
+ setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL);
+ }
- setup_pre_planes(xd, 1, scaled_ref_frame[1], mi_row, mi_col, NULL);
+ xd->scale_factor[ref].sfc->set_scaled_offsets(&xd->scale_factor[ref],
+ mi_row, mi_col);
+ frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
}
- xd->scale_factor[0].sfc->set_scaled_offsets(&xd->scale_factor[0],
- mi_row, mi_col);
- xd->scale_factor[1].sfc->set_scaled_offsets(&xd->scale_factor[1],
- mi_row, mi_col);
- scaled_first_yv12 = xd->plane[0].pre[0];
-
- // Initialize mv using single prediction mode result.
- frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
- frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
-
// Allow joint search multiple times iteratively for each ref frame
// and break out the search loop if it couldn't find better mv.
for (ite = 0; ite < 4; ite++) {
@@ -2604,24 +2619,20 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- // restore the predictor
- if (scaled_ref_frame[0]) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[0] = backup_yv12[i];
- }
+ *rate_mv = 0;
- if (scaled_ref_frame[1]) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[1] = backup_second_yv12[i];
+ for (ref = 0; ref < 2; ++ref) {
+ if (scaled_ref_frame[ref]) {
+ // restore the predictor
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[ref] = backup_yv12[ref][i];
+ }
+
+ *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
+ &mbmi->ref_mvs[refs[ref]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
}
- *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
- &mbmi->ref_mvs[refs[0]][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
- *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
- &mbmi->ref_mvs[refs[1]][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
vpx_free(second_pred);
}
@@ -3046,6 +3057,30 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
return this_rd; // if 0, this will be re-calculated by caller
}
+static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
+ int max_plane) {
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = x->e_mbd.plane;
+ int i;
+
+ for (i = 0; i < max_plane; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][1];
+ pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
+ pd[i].eobs = ctx->eobs_pbuf[i][1];
+
+ ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0];
+ ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0];
+ ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
+ ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0];
+
+ ctx->coeff_pbuf[i][0] = p[i].coeff;
+ ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff;
+ ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
+ ctx->eobs_pbuf[i][0] = pd[i].eobs;
+ }
+}
+
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int *returnrate, int64_t *returndist,
BLOCK_SIZE bsize,
@@ -3065,7 +3100,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returnrate = INT_MAX;
return;
}
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, bsize);
} else {
y_skip = 0;
@@ -3074,7 +3109,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returnrate = INT_MAX;
return;
}
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, BLOCK_8X8);
}
@@ -3157,7 +3192,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
int best_skip2 = 0;
- x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
+ x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
// Everywhere the flag is set the error is much higher than its neighbors.
ctx->frames_with_high_error = 0;
@@ -3196,8 +3231,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
case BLOCK_32X32:
for (i = 0; i < 4; i++) {
ref_frame_mask |=
- x->mb_context[xd->sb_index][i].frames_with_high_error;
- mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error;
+ x->mb_context[x->sb_index][i].frames_with_high_error;
+ mode_mask |= x->mb_context[x->sb_index][i].modes_with_high_error;
}
break;
default:
@@ -3440,7 +3475,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
if (rate_uv_intra[uv_tx] == INT_MAX) {
- choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx],
+ choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx],
&rate_uv_tokenonly[uv_tx],
&dist_uv[uv_tx], &skip_uv[uv_tx],
&mode_uv[uv_tx]);
@@ -3574,6 +3609,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Did this mode help.. i.e. is it the new best mode
if (this_rd < best_rd || x->skip) {
+ int max_plane = MAX_MB_PLANE;
if (!mode_excluded) {
// Note index of best mode so far
best_mode_index = mode_index;
@@ -3581,6 +3617,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */
mbmi->mv[0].as_int = 0;
+ max_plane = 1;
}
*returnrate = rate2;
@@ -3588,6 +3625,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_rd = this_rd;
best_mbmode = *mbmi;
best_skip2 = this_skip2;
+ if (!x->select_txfm_size)
+ swap_block_ptr(x, ctx, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(uint8_t) * ctx->num_4x4_blk);
@@ -3694,7 +3733,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
&rate_uv_tokenonly[uv_tx_size],
&dist_uv[uv_tx_size],
&skip_uv[uv_tx_size],
@@ -3850,7 +3889,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
b_mode_info best_bmodes[4];
int best_skip2 = 0;
- x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
+ x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
for (i = 0; i < 4; i++) {
@@ -4063,7 +4102,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += distortion_y;
if (rate_uv_intra[TX_4X4] == INT_MAX) {
- choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4],
+ choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4],
&rate_uv_tokenonly[TX_4X4],
&dist_uv[TX_4X4], &skip_uv[TX_4X4],
&mode_uv[TX_4X4]);
@@ -4317,12 +4356,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// Did this mode help.. i.e. is it the new best mode
if (this_rd < best_rd || x->skip) {
if (!mode_excluded) {
+ int max_plane = MAX_MB_PLANE;
// Note index of best mode so far
best_mode_index = mode_index;
if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */
mbmi->mv[0].as_int = 0;
+ max_plane = 1;
}
*returnrate = rate2;
@@ -4332,6 +4373,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
best_mbmode = *mbmi;
best_skip2 = this_skip2;
+ if (!x->select_txfm_size)
+ swap_block_ptr(x, ctx, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(uint8_t) * ctx->num_4x4_blk);
@@ -4438,7 +4481,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
&rate_uv_tokenonly[uv_tx_size],
&dist_uv[uv_tx_size],
&skip_uv[uv_tx_size],
diff --git a/libvpx/vp9/encoder/vp9_tokenize.c b/libvpx/vp9/encoder/vp9_tokenize.c
index 7d4676e..c7336d0 100644
--- a/libvpx/vp9/encoder/vp9_tokenize.c
+++ b/libvpx/vp9/encoder/vp9_tokenize.c
@@ -57,7 +57,7 @@ static void fill_value_tokens() {
// initialize the cost for extra bits for all possible coefficient value.
{
int cost = 0;
- const vp9_extra_bit *p = vp9_extra_bits + t[i].token;
+ const vp9_extra_bit *p = &vp9_extra_bits[t[i].token];
if (p->base_val) {
const int extra = t[i].extra;
@@ -73,7 +73,7 @@ static void fill_value_tokens() {
} while (++i < DCT_MAX_VALUE);
vp9_dct_value_tokens_ptr = dct_value_tokens + DCT_MAX_VALUE;
- vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE;
+ vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE;
}
struct tokenize_b_args {
@@ -127,7 +127,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
get_scan(xd, tx_size, type, block, &scan, &nb);
c = 0;
do {
- const int band = get_coef_band(band_translate, c);
+ const int band = band_translate[c];
int token;
int v = 0;
rc = scan[c];
diff --git a/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
index dc11501..fefca66 100644
--- a/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
@@ -206,12 +206,12 @@ void fadst4_1d_sse2(__m128i *in) {
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
__m128i u[8], v[8];
__m128i in7 = _mm_add_epi16(in[0], in[1]);
- in7 = _mm_sub_epi16(in7, in[3]);
u[0] = _mm_unpacklo_epi16(in[0], in[1]);
u[1] = _mm_unpacklo_epi16(in[2], in[3]);
u[2] = _mm_unpacklo_epi16(in7, kZero);
u[3] = _mm_unpacklo_epi16(in[2], kZero);
+ u[4] = _mm_unpacklo_epi16(in[3], kZero);
v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p02); // s0 + s2
v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p04); // s4 + s5
@@ -219,9 +219,10 @@ void fadst4_1d_sse2(__m128i *in) {
v[3] = _mm_madd_epi16(u[0], k__sinpi_p04_m01); // s1 - s3
v[4] = _mm_madd_epi16(u[1], k__sinpi_m03_p02); // -s4 + s6
v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s4
+ v[6] = _mm_madd_epi16(u[4], k__sinpi_p03_p03);
u[0] = _mm_add_epi32(v[0], v[1]);
- u[1] = v[2];
+ u[1] = _mm_sub_epi32(v[2], v[6]);
u[2] = _mm_add_epi32(v[3], v[4]);
u[3] = _mm_sub_epi32(u[2], u[0]);
u[4] = _mm_slli_epi32(v[5], 2);
diff --git a/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm b/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm
index 533456b..1a9e4e8 100644
--- a/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -118,6 +118,14 @@ SECTION .text
RET
%endmacro
+%macro INC_SRC_BY_SRC_STRIDE 0
+%if ARCH_X86=1 && CONFIG_PIC=1
+ add srcq, src_stridemp
+%else
+ add srcq, src_strideq
+%endif
+%endmacro
+
%macro SUBPEL_VARIANCE 1-2 0 ; W
%if cpuflag(ssse3)
%define bilin_filter_m bilin_filter_m_ssse3
@@ -129,41 +137,85 @@ SECTION .text
; FIXME(rbultje) only bilinear filters use >8 registers, and ssse3 only uses
; 11, not 13, if the registers are ordered correctly. May make a minor speed
; difference on Win64
-%ifdef PIC
-%if %2 == 1 ; avg
-cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse
-%define sec_str sec_strideq
-%else
-cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, y_offset, \
- dst, dst_stride, height, sse
-%endif
-%define h heightd
-%define bilin_filter sseq
-%else
-%if %2 == 1 ; avg
-cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
- 7 + 2 * ARCH_X86_64, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse
-%if ARCH_X86_64
-%define h heightd
-%define sec_str sec_strideq
-%else
-%define h dword heightm
-%define sec_str sec_stridemp
-%endif
+
+%ifdef PIC ; 64bit PIC
+ %if %2 == 1 ; avg
+ cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, height, sse
+ %define sec_str sec_strideq
+ %else
+ cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \
+ y_offset, dst, dst_stride, height, sse
+ %endif
+ %define h heightd
+ %define bilin_filter sseq
%else
-cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
- dst, dst_stride, height, sse
-%define h heightd
-%endif
-%define bilin_filter bilin_filter_m
+ %if ARCH_X86=1 && CONFIG_PIC=1
+ %if %2 == 1 ; avg
+ cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, \
+ height, sse, g_bilin_filter, g_pw_8
+ %define h dword heightm
+ %define sec_str sec_stridemp
+
+ ;Store bilin_filter and pw_8 location in stack
+ GET_GOT eax
+ add esp, 4 ; restore esp
+
+ lea ecx, [GLOBAL(bilin_filter_m)]
+ mov g_bilin_filterm, ecx
+
+ lea ecx, [GLOBAL(pw_8)]
+ mov g_pw_8m, ecx
+
+ LOAD_IF_USED 0, 1 ; load eax, ecx back
+ %else
+ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
+ y_offset, dst, dst_stride, height, sse, \
+ g_bilin_filter, g_pw_8
+ %define h heightd
+
+ ;Store bilin_filter and pw_8 location in stack
+ GET_GOT eax
+ add esp, 4 ; restore esp
+
+ lea ecx, [GLOBAL(bilin_filter_m)]
+ mov g_bilin_filterm, ecx
+
+ lea ecx, [GLOBAL(pw_8)]
+ mov g_pw_8m, ecx
+
+ LOAD_IF_USED 0, 1 ; load eax, ecx back
+ %endif
+ %else
+ %if %2 == 1 ; avg
+ cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
+ 7 + 2 * ARCH_X86_64, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, \
+ height, sse
+ %if ARCH_X86_64
+ %define h heightd
+ %define sec_str sec_strideq
+ %else
+ %define h dword heightm
+ %define sec_str sec_stridemp
+ %endif
+ %else
+ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
+ y_offset, dst, dst_stride, height, sse
+ %define h heightd
+ %endif
+
+ %define bilin_filter bilin_filter_m
+ %endif
%endif
+
ASSERT %1 <= 16 ; m6 overflows if w > 16
pxor m6, m6 ; sum
pxor m7, m7 ; sse
@@ -329,11 +381,22 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
%define filter_y_b m9
%define filter_rnd m10
%else ; x86-32 or mmx
+%if ARCH_X86=1 && CONFIG_PIC=1
+; x_offset == 0, reuse x_offset reg
+%define tempq x_offsetq
+ add y_offsetq, g_bilin_filterm
+%define filter_y_a [y_offsetq]
+%define filter_y_b [y_offsetq+16]
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
+%else
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
.x_zero_y_other_loop:
%if %1 == 16
movu m0, [srcq]
@@ -615,12 +678,23 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
+%else ;x86_32
+%if ARCH_X86=1 && CONFIG_PIC=1
+; x_offset == 0.5. We can reuse x_offset reg
+%define tempq x_offsetq
+ add y_offsetq, g_bilin_filterm
+%define filter_y_a [y_offsetq]
+%define filter_y_b [y_offsetq+16]
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
%else
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
%if %1 == 16
movu m0, [srcq]
movu m3, [srcq+1]
@@ -752,12 +826,23 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
+%else ; x86-32
+%if ARCH_X86=1 && CONFIG_PIC=1
+;y_offset == 0. We can reuse y_offset reg.
+%define tempq y_offsetq
+ add x_offsetq, g_bilin_filterm
+%define filter_x_a [x_offsetq]
+%define filter_x_b [x_offsetq+16]
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
%else
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
.x_other_y_zero_loop:
%if %1 == 16
movu m0, [srcq]
@@ -873,12 +958,23 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
+%else ; x86-32
+%if ARCH_X86=1 && CONFIG_PIC=1
+; y_offset == 0.5. We can reuse y_offset reg.
+%define tempq y_offsetq
+ add x_offsetq, g_bilin_filterm
+%define filter_x_a [x_offsetq]
+%define filter_x_b [x_offsetq+16]
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
%else
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
%if %1 == 16
movu m0, [srcq]
movu m1, [srcq+1]
@@ -1057,6 +1153,21 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
%define filter_y_a m10
%define filter_y_b m11
%define filter_rnd m12
+%else ; x86-32
+%if ARCH_X86=1 && CONFIG_PIC=1
+; In this case, there is NO unused register. Used src_stride register. Later,
+; src_stride has to be loaded from stack when it is needed.
+%define tempq src_strideq
+ mov tempq, g_bilin_filterm
+ add x_offsetq, tempq
+ add y_offsetq, tempq
+%define filter_x_a [x_offsetq]
+%define filter_x_b [x_offsetq+16]
+%define filter_y_a [y_offsetq]
+%define filter_y_b [y_offsetq+16]
+
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
%else
add x_offsetq, bilin_filter
add y_offsetq, bilin_filter
@@ -1066,6 +1177,8 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
%define filter_y_b [y_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
; x_offset == bilin interpolation && y_offset == bilin interpolation
%if %1 == 16
movu m0, [srcq]
@@ -1093,7 +1206,9 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
%endif
psraw m0, 4
psraw m2, 4
- add srcq, src_strideq
+
+ INC_SRC_BY_SRC_STRIDE
+
packuswb m0, m2
.x_other_y_other_loop:
%if cpuflag(ssse3)
@@ -1163,7 +1278,7 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
SUM_SSE m0, m1, m2, m3, m6, m7
mova m0, m4
- add srcq, src_strideq
+ INC_SRC_BY_SRC_STRIDE
add dstq, dst_strideq
%else ; %1 < 16
movh m0, [srcq]
@@ -1184,12 +1299,17 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
%if cpuflag(ssse3)
packuswb m0, m0
%endif
- add srcq, src_strideq
+
+ INC_SRC_BY_SRC_STRIDE
+
.x_other_y_other_loop:
movh m2, [srcq]
movh m1, [srcq+1]
- movh m4, [srcq+src_strideq]
- movh m3, [srcq+src_strideq+1]
+
+ INC_SRC_BY_SRC_STRIDE
+ movh m4, [srcq]
+ movh m3, [srcq+1]
+
%if cpuflag(ssse3)
punpcklbw m2, m1
punpcklbw m4, m3
@@ -1253,7 +1373,7 @@ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
SUM_SSE m0, m1, m2, m3, m6, m7
mova m0, m4
- lea srcq, [srcq+src_strideq*2]
+ INC_SRC_BY_SRC_STRIDE
lea dstq, [dstq+dst_strideq*2]
%endif
%if %2 == 1 ; avg