summaryrefslogtreecommitdiffstats
path: root/libvpx/vp9/decoder
diff options
context:
space:
mode:
authorhkuang <hkuang@google.com>2013-08-06 11:07:19 -0700
committerHangyu Kuang <hkuang@google.com>2013-08-06 18:31:37 +0000
commitf3bed9137f66ef693bd406e43b17e9a1114f1e14 (patch)
treecd1bea0cd923c6d125cb5b3e7b3404d7c2f70208 /libvpx/vp9/decoder
parenta8b927ab4f06e2fc0d16d9606b57672df9899ac1 (diff)
downloadandroid_external_libvpx-f3bed9137f66ef693bd406e43b17e9a1114f1e14.tar.gz
android_external_libvpx-f3bed9137f66ef693bd406e43b17e9a1114f1e14.tar.bz2
android_external_libvpx-f3bed9137f66ef693bd406e43b17e9a1114f1e14.zip
Roll latest libvpx into Android.
The latest libvpx just added initial multithread vp9 decoding support and more neon optimization. Checkout is from master branch(hash:33afddadb9af6569bd8296ef1d48d0511b651e9d). Change-Id: I54be2f48bc033c00876b6b1d0a3ff1eccb92a2fa
Diffstat (limited to 'libvpx/vp9/decoder')
-rw-r--r--libvpx/vp9/decoder/vp9_decodemv.c446
-rw-r--r--libvpx/vp9/decoder/vp9_decodemv.h1
-rw-r--r--libvpx/vp9/decoder/vp9_decodframe.c76
-rw-r--r--libvpx/vp9/decoder/vp9_detokenize.c52
-rw-r--r--libvpx/vp9/decoder/vp9_detokenize.h1
-rw-r--r--libvpx/vp9/decoder/vp9_idct_blk.c23
-rw-r--r--libvpx/vp9/decoder/vp9_onyxd_if.c18
-rw-r--r--libvpx/vp9/decoder/vp9_onyxd_int.h5
-rw-r--r--libvpx/vp9/decoder/vp9_thread.c248
-rw-r--r--libvpx/vp9/decoder/vp9_thread.h93
-rw-r--r--libvpx/vp9/decoder/vp9_treereader.h1
11 files changed, 661 insertions, 303 deletions
diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c
index 6f0044a..a3e2ad3 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/libvpx/vp9/decoder/vp9_decodemv.c
@@ -30,8 +30,12 @@ static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p);
}
-static MB_PREDICTION_MODE read_inter_mode(vp9_reader *r, const vp9_prob *p) {
- return (MB_PREDICTION_MODE)treed_read(r, vp9_inter_mode_tree, p);
+static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r,
+ uint8_t context) {
+ MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree,
+ cm->fc.inter_mode_probs[context]);
+ ++cm->counts.inter_mode[context][inter_mode_offset(mode)];
+ return mode;
}
static int read_segment_id(vp9_reader *r, const struct segmentation *seg) {
@@ -43,9 +47,9 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
const uint8_t context = vp9_get_pred_context_tx_size(xd);
const vp9_prob *tx_probs = get_tx_probs(bsize, context, &cm->fc.tx_probs);
TX_SIZE tx_size = vp9_read(r, tx_probs[0]);
- if (tx_size != TX_4X4 && bsize >= BLOCK_SIZE_MB16X16) {
+ if (tx_size != TX_4X4 && bsize >= BLOCK_16X16) {
tx_size += vp9_read(r, tx_probs[1]);
- if (tx_size != TX_8X8 && bsize >= BLOCK_SIZE_SB32X32)
+ if (tx_size != TX_8X8 && bsize >= BLOCK_32X32)
tx_size += vp9_read(r, tx_probs[2]);
}
@@ -54,18 +58,18 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
}
static TX_SIZE read_tx_size(VP9D_COMP *pbi, TX_MODE tx_mode,
- BLOCK_SIZE_TYPE bsize, int select_cond,
+ BLOCK_SIZE_TYPE bsize, int allow_select,
vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- if (tx_mode == TX_MODE_SELECT && bsize >= BLOCK_SIZE_SB8X8 && select_cond)
+ if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
return read_selected_tx_size(cm, xd, bsize, r);
- else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_SIZE_SB32X32)
+ else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_32X32)
return TX_32X32;
- else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_SIZE_MB16X16)
+ else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_16X16)
return TX_16X16;
- else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_SIZE_SB8X8)
+ else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_8X8)
return TX_8X8;
else
return TX_4X4;
@@ -146,8 +150,8 @@ static uint8_t read_skip_coeff(VP9D_COMP *pbi, int segment_id, vp9_reader *r) {
return skip_coeff;
}
-static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
- int mi_row, int mi_col, vp9_reader *r) {
+static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
+ int mi_row, int mi_col, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
MB_MODE_INFO *const mbmi = &m->mbmi;
@@ -158,6 +162,7 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r);
mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, bsize, 1, r);
mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE;
if (bsize >= BLOCK_SIZE_SB8X8) {
const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis);
@@ -166,12 +171,12 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
mbmi->mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]);
} else {
// Only 4x4, 4x8, 8x4 blocks
- const int bw = 1 << b_width_log2(bsize);
- const int bh = 1 << b_height_log2(bsize);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
int idx, idy;
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
const int ib = idy * 2 + idx;
const MB_PREDICTION_MODE A = above_block_mode(m, ib, mis);
const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
@@ -179,9 +184,9 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
const MB_PREDICTION_MODE b_mode = read_intra_mode(r,
vp9_kf_y_mode_prob[A][L]);
m->bmi[ib].as_mode = b_mode;
- if (bh == 2)
+ if (num_4x4_h == 2)
m->bmi[ib + 2].as_mode = b_mode;
- if (bw == 2)
+ if (num_4x4_w == 2)
m->bmi[ib + 1].as_mode = b_mode;
}
}
@@ -228,16 +233,16 @@ static int read_mv_component(vp9_reader *r,
static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
const nmv_context *ctx,
- nmv_context_counts *counts, int usehp) {
+ nmv_context_counts *counts, int allow_hp) {
const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, ctx->joints);
+ const int use_hp = allow_hp && vp9_use_mv_hp(ref);
MV diff = {0, 0};
- usehp = usehp && vp9_use_mv_hp(ref);
if (mv_joint_vertical(j))
- diff.row = read_mv_component(r, &ctx->comps[0], usehp);
+ diff.row = read_mv_component(r, &ctx->comps[0], use_hp);
if (mv_joint_horizontal(j))
- diff.col = read_mv_component(r, &ctx->comps[1], usehp);
+ diff.col = read_mv_component(r, &ctx->comps[1], use_hp);
vp9_inc_mv(&diff, counts);
@@ -245,29 +250,30 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
mv->col = ref->col + diff.col;
}
-static void update_mv(vp9_reader *r, vp9_prob *p, vp9_prob upd_p) {
- if (vp9_read(r, upd_p))
+static void update_mv(vp9_reader *r, vp9_prob *p) {
+ if (vp9_read(r, VP9_NMV_UPDATE_PROB))
*p = (vp9_read_literal(r, 7) << 1) | 1;
}
-static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int usehp) {
+static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) {
int i, j, k;
for (j = 0; j < MV_JOINTS - 1; ++j)
- update_mv(r, &mvc->joints[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &mvc->joints[j]);
for (i = 0; i < 2; ++i) {
nmv_component *const comp = &mvc->comps[i];
- update_mv(r, &comp->sign, VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->sign);
+
for (j = 0; j < MV_CLASSES - 1; ++j)
- update_mv(r, &comp->classes[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->classes[j]);
for (j = 0; j < CLASS0_SIZE - 1; ++j)
- update_mv(r, &comp->class0[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->class0[j]);
for (j = 0; j < MV_OFFSET_BITS; ++j)
- update_mv(r, &comp->bits[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->bits[j]);
}
for (i = 0; i < 2; ++i) {
@@ -275,23 +281,23 @@ static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int usehp) {
for (j = 0; j < CLASS0_SIZE; ++j)
for (k = 0; k < 3; ++k)
- update_mv(r, &comp->class0_fp[j][k], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->class0_fp[j][k]);
for (j = 0; j < 3; ++j)
- update_mv(r, &comp->fp[j], VP9_NMV_UPDATE_PROB);
+ update_mv(r, &comp->fp[j]);
}
- if (usehp) {
+ if (allow_hp) {
for (i = 0; i < 2; ++i) {
- update_mv(r, &mvc->comps[i].class0_hp, VP9_NMV_UPDATE_PROB);
- update_mv(r, &mvc->comps[i].hp, VP9_NMV_UPDATE_PROB);
+ update_mv(r, &mvc->comps[i].class0_hp);
+ update_mv(r, &mvc->comps[i].hp);
}
}
}
// Read the referncence frame
-static void read_ref_frame(VP9D_COMP *pbi, vp9_reader *r,
- int segment_id, MV_REFERENCE_FRAME ref_frame[2]) {
+static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r,
+ int segment_id, MV_REFERENCE_FRAME ref_frame[2]) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
FRAME_CONTEXT *const fc = &cm->fc;
@@ -320,18 +326,19 @@ static void read_ref_frame(VP9D_COMP *pbi, vp9_reader *r,
ref_frame[fix_ref_idx] = cm->comp_fixed_ref;
ref_frame[!fix_ref_idx] = cm->comp_var_ref[b];
} else {
- const int ref1_ctx = vp9_get_pred_context_single_ref_p1(xd);
- ref_frame[1] = NONE;
- if (vp9_read(r, fc->single_ref_prob[ref1_ctx][0])) {
- const int ref2_ctx = vp9_get_pred_context_single_ref_p2(xd);
- const int b = vp9_read(r, fc->single_ref_prob[ref2_ctx][1]);
- ref_frame[0] = b ? ALTREF_FRAME : GOLDEN_FRAME;
- counts->single_ref[ref1_ctx][0][1]++;
- counts->single_ref[ref2_ctx][1][b]++;
+ const int ctx0 = vp9_get_pred_context_single_ref_p1(xd);
+ const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]);
+ ++counts->single_ref[ctx0][0][bit0];
+ if (bit0) {
+ const int ctx1 = vp9_get_pred_context_single_ref_p2(xd);
+ const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]);
+ ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
+ ++counts->single_ref[ctx1][1][bit1];
} else {
ref_frame[0] = LAST_FRAME;
- counts->single_ref[ref1_ctx][0][0]++;
}
+
+ ref_frame[1] = NONE;
}
}
}
@@ -359,16 +366,6 @@ static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) {
return mode;
}
-static INLINE void assign_and_clamp_mv(int_mv *dst, const int_mv *src,
- int mb_to_left_edge,
- int mb_to_right_edge,
- int mb_to_top_edge,
- int mb_to_bottom_edge) {
- dst->as_int = src->as_int;
- clamp_mv(dst, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge,
- mb_to_bottom_edge);
-}
-
static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type(
VP9D_COMP *pbi, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
@@ -380,32 +377,35 @@ static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type(
return vp9_switchable_interp[index];
}
-static void read_intra_block_modes(VP9D_COMP *pbi, MODE_INFO *mi,
- vp9_reader *r) {
+static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
+ vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MB_MODE_INFO *const mbmi = &mi->mbmi;
const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type;
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
+
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE;
if (bsize >= BLOCK_SIZE_SB8X8) {
- const int size_group = MIN(3, MIN(bwl, bhl));
+ const int size_group = size_group_lookup[bsize];
mbmi->mode = read_intra_mode(r, cm->fc.y_mode_prob[size_group]);
cm->counts.y_mode[size_group][mbmi->mode]++;
} else {
// Only 4x4, 4x8, 8x4 blocks
- const int bw = 1 << bwl, bh = 1 << bhl;
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
int idx, idy;
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
const int ib = idy * 2 + idx;
const int b_mode = read_intra_mode(r, cm->fc.y_mode_prob[0]);
mi->bmi[ib].as_mode = b_mode;
cm->counts.y_mode[0][b_mode]++;
- if (bh == 2)
+ if (num_4x4_h == 2)
mi->bmi[ib + 2].as_mode = b_mode;
- if (bw == 2)
+ if (num_4x4_w == 2)
mi->bmi[ib + 1].as_mode = b_mode;
}
}
@@ -416,203 +416,197 @@ static void read_intra_block_modes(VP9D_COMP *pbi, MODE_INFO *mi,
cm->counts.uv_mode[mbmi->mode][mbmi->uv_mode]++;
}
-static MV_REFERENCE_FRAME read_reference_frame(VP9D_COMP *pbi, int segment_id,
- vp9_reader *r) {
+static int read_is_inter_block(VP9D_COMP *pbi, int segment_id, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- MV_REFERENCE_FRAME ref;
- if (!vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) {
- const int ctx = vp9_get_pred_context_intra_inter(xd);
- ref = (MV_REFERENCE_FRAME)
- vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd));
- cm->counts.intra_inter[ctx][ref != INTRA_FRAME]++;
+ if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ return vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_REF_FRAME) !=
+ INTRA_FRAME;
} else {
- ref = (MV_REFERENCE_FRAME) vp9_get_segdata(&xd->seg, segment_id,
- SEG_LVL_REF_FRAME) != INTRA_FRAME;
+ const int ctx = vp9_get_pred_context_intra_inter(xd);
+ const int is_inter = vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd));
+ ++cm->counts.intra_inter[ctx][is_inter];
+ return is_inter;
}
- return ref;
}
-static void read_inter_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
- int mi_row, int mi_col, vp9_reader *r) {
+static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
+ int mi_row, int mi_col, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
nmv_context *const nmvc = &cm->fc.nmvc;
MB_MODE_INFO *const mbmi = &mi->mbmi;
-
int_mv *const mv0 = &mbmi->mv[0];
int_mv *const mv1 = &mbmi->mv[1];
- const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type;
- const int bw = 1 << b_width_log2(bsize);
- const int bh = 1 << b_height_log2(bsize);
-
- int idx, idy;
+ const BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
+ const int allow_hp = xd->allow_high_precision_mv;
- mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r);
- mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r);
- mbmi->ref_frame[0] = read_reference_frame(pbi, mbmi->segment_id, r);
- mbmi->ref_frame[1] = NONE;
- mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, bsize,
- (!mbmi->mb_skip_coeff || mbmi->ref_frame[0] == INTRA_FRAME), r);
+ int_mv nearest, nearby, best_mv;
+ int_mv nearest_second, nearby_second, best_mv_second;
+ uint8_t inter_mode_ctx;
+ MV_REFERENCE_FRAME ref0, ref1;
- if (mbmi->ref_frame[0] != INTRA_FRAME) {
- int_mv nearest, nearby, best_mv;
- int_mv nearest_second, nearby_second, best_mv_second;
- vp9_prob *mv_ref_p;
- MV_REFERENCE_FRAME ref0, ref1;
+ read_ref_frames(pbi, r, mbmi->segment_id, mbmi->ref_frame);
+ ref0 = mbmi->ref_frame[0];
+ ref1 = mbmi->ref_frame[1];
- read_ref_frame(pbi, r, mbmi->segment_id, mbmi->ref_frame);
- ref0 = mbmi->ref_frame[0];
- ref1 = mbmi->ref_frame[1];
+ vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
+ ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias,
+ mi_row, mi_col);
- vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
- ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias);
+ inter_mode_ctx = mbmi->mb_mode_context[ref0];
- mv_ref_p = cm->fc.inter_mode_probs[mbmi->mb_mode_context[ref0]];
+ if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ mbmi->mode = ZEROMV;
+ else if (bsize >= BLOCK_SIZE_SB8X8)
+ mbmi->mode = read_inter_mode(cm, r, inter_mode_ctx);
- if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- mbmi->mode = ZEROMV;
- } else if (bsize >= BLOCK_SIZE_SB8X8) {
- mbmi->mode = read_inter_mode(r, mv_ref_p);
- vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref0]);
- }
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = DC_PRED;
- // nearest, nearby
- if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) {
- vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby);
- best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int;
- }
+ // nearest, nearby
+ if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) {
+ vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby);
+ best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int;
+ }
- mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE
- ? read_switchable_filter_type(pbi, r)
- : cm->mcomp_filter_type;
+ mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE
+ ? read_switchable_filter_type(pbi, r)
+ : cm->mcomp_filter_type;
- if (ref1 > INTRA_FRAME) {
- vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
- ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias);
+ if (ref1 > INTRA_FRAME) {
+ vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
+ ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias,
+ mi_row, mi_col);
- if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) {
- vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1],
- &nearest_second, &nearby_second);
- best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int;
- }
+ if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) {
+ vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1],
+ &nearest_second, &nearby_second);
+ best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int;
}
+ }
+ if (bsize < BLOCK_SIZE_SB8X8) {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
+ int idx, idy;
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ int_mv blockmv, secondmv;
+ const int j = idy * 2 + idx;
+ const int b_mode = read_inter_mode(cm, r, inter_mode_ctx);
- if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
- for (idy = 0; idy < 2; idy += bh) {
- for (idx = 0; idx < 2; idx += bw) {
- int_mv blockmv, secondmv;
- const int j = idy * 2 + idx;
- const int blockmode = read_inter_mode(r, mv_ref_p);
+ if (b_mode == NEARESTMV || b_mode == NEARMV) {
+ vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0,
+ mi_row, mi_col);
- vp9_accum_mv_refs(cm, blockmode, mbmi->mb_mode_context[ref0]);
- if (blockmode == NEARESTMV || blockmode == NEARMV) {
- vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0);
- if (ref1 > 0)
- vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second,
- &nearby_second, j, 1);
- }
-
- switch (blockmode) {
- case NEWMV:
- read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc,
- &cm->counts.mv, xd->allow_high_precision_mv);
-
- if (ref1 > 0)
- read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
- &cm->counts.mv, xd->allow_high_precision_mv);
- break;
- case NEARESTMV:
- blockmv.as_int = nearest.as_int;
- if (ref1 > 0)
- secondmv.as_int = nearest_second.as_int;
- break;
- case NEARMV:
- blockmv.as_int = nearby.as_int;
- if (ref1 > 0)
- secondmv.as_int = nearby_second.as_int;
- break;
- case ZEROMV:
- blockmv.as_int = 0;
- if (ref1 > 0)
- secondmv.as_int = 0;
- break;
- default:
- assert(!"Invalid inter mode value");
- }
- mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
if (ref1 > 0)
- mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
-
- if (bh == 2)
- mi->bmi[j + 2] = mi->bmi[j];
- if (bw == 2)
- mi->bmi[j + 1] = mi->bmi[j];
- mi->mbmi.mode = blockmode;
+ vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second,
+ &nearby_second, j, 1,
+ mi_row, mi_col);
}
- }
- mv0->as_int = mi->bmi[3].as_mv[0].as_int;
- mv1->as_int = mi->bmi[3].as_mv[1].as_int;
- } else {
- const int mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
- const int mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
- const int mb_to_left_edge = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
- const int mb_to_right_edge = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
- switch (mbmi->mode) {
- case NEARMV:
- // Clip "next_nearest" so that it does not extend to far out of image
- assign_and_clamp_mv(mv0, &nearby, mb_to_left_edge,
- mb_to_right_edge,
- mb_to_top_edge,
- mb_to_bottom_edge);
- if (ref1 > 0)
- assign_and_clamp_mv(mv1, &nearby_second, mb_to_left_edge,
- mb_to_right_edge,
- mb_to_top_edge,
- mb_to_bottom_edge);
- break;
-
- case NEARESTMV:
- // Clip "next_nearest" so that it does not extend to far out of image
- assign_and_clamp_mv(mv0, &nearest, mb_to_left_edge,
- mb_to_right_edge,
- mb_to_top_edge,
- mb_to_bottom_edge);
- if (ref1 > 0)
- assign_and_clamp_mv(mv1, &nearest_second, mb_to_left_edge,
- mb_to_right_edge,
- mb_to_top_edge,
- mb_to_bottom_edge);
- break;
-
- case ZEROMV:
- mv0->as_int = 0;
- if (ref1 > 0)
- mv1->as_int = 0;
- break;
+ switch (b_mode) {
+ case NEWMV:
+ read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc,
+ &cm->counts.mv, allow_hp);
- case NEWMV:
- read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv,
- xd->allow_high_precision_mv);
- if (ref1 > 0)
- read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc,
- &cm->counts.mv, xd->allow_high_precision_mv);
- break;
- default:
- assert(!"Invalid inter mode value");
+ if (ref1 > 0)
+ read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
+ &cm->counts.mv, allow_hp);
+ break;
+ case NEARESTMV:
+ blockmv.as_int = nearest.as_int;
+ if (ref1 > 0)
+ secondmv.as_int = nearest_second.as_int;
+ break;
+ case NEARMV:
+ blockmv.as_int = nearby.as_int;
+ if (ref1 > 0)
+ secondmv.as_int = nearby_second.as_int;
+ break;
+ case ZEROMV:
+ blockmv.as_int = 0;
+ if (ref1 > 0)
+ secondmv.as_int = 0;
+ break;
+ default:
+ assert(!"Invalid inter mode value");
+ }
+ mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
+ if (ref1 > 0)
+ mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
+
+ if (num_4x4_h == 2)
+ mi->bmi[j + 2] = mi->bmi[j];
+ if (num_4x4_w == 2)
+ mi->bmi[j + 1] = mi->bmi[j];
+ mi->mbmi.mode = b_mode;
}
}
+
+ mv0->as_int = mi->bmi[3].as_mv[0].as_int;
+ mv1->as_int = mi->bmi[3].as_mv[1].as_int;
} else {
- mv0->as_int = 0; // required for left and above block mv
- read_intra_block_modes(pbi, mi, r);
+ switch (mbmi->mode) {
+ case NEARMV:
+ mv0->as_int = nearby.as_int;
+ clamp_mv2(&mv0->as_mv, xd);
+
+ if (ref1 > 0) {
+ mv1->as_int = nearby_second.as_int;
+ clamp_mv2(&mv1->as_mv, xd);
+ }
+ break;
+
+ case NEARESTMV:
+ mv0->as_int = nearest.as_int;
+ clamp_mv2(&mv0->as_mv, xd);
+
+ if (ref1 > 0) {
+ mv1->as_int = nearest_second.as_int;
+ clamp_mv2(&mv1->as_mv, xd);
+ }
+ break;
+
+ case ZEROMV:
+ mv0->as_int = 0;
+ if (ref1 > 0)
+ mv1->as_int = 0;
+ break;
+
+ case NEWMV:
+ read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, allow_hp);
+ if (ref1 > 0)
+ read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, &cm->counts.mv,
+ allow_hp);
+ break;
+ default:
+ assert(!"Invalid inter mode value");
+ }
}
}
+static void read_inter_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
+ int mi_row, int mi_col, vp9_reader *r) {
+ VP9_COMMON *const cm = &pbi->common;
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ int inter_block;
+
+ mbmi->mv[0].as_int = 0;
+ mbmi->mv[1].as_int = 0;
+ mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r);
+ mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r);
+ inter_block = read_is_inter_block(pbi, mbmi->segment_id, r);
+ mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, mbmi->sb_type,
+ !mbmi->mb_skip_coeff || !inter_block, r);
+
+ if (inter_block)
+ read_inter_block_mode_info(pbi, mi, mi_row, mi_col, r);
+ else
+ read_intra_block_mode_info(pbi, mi, r);
+}
+
static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
int i;
@@ -690,9 +684,9 @@ void vp9_read_mode_info(VP9D_COMP* pbi, int mi_row, int mi_col, vp9_reader *r) {
int x, y;
if (cm->frame_type == KEY_FRAME || cm->intra_only)
- read_intra_mode_info(pbi, mi, mi_row, mi_col, r);
+ read_intra_frame_mode_info(pbi, mi, mi_row, mi_col, r);
else
- read_inter_mode_info(pbi, mi, mi_row, mi_col, r);
+ read_inter_frame_mode_info(pbi, mi, mi_row, mi_col, r);
for (y = 0; y < y_mis; y++)
for (x = !y; x < x_mis; x++)
diff --git a/libvpx/vp9/decoder/vp9_decodemv.h b/libvpx/vp9/decoder/vp9_decodemv.h
index 4073d9e..462d2e3 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.h
+++ b/libvpx/vp9/decoder/vp9_decodemv.h
@@ -12,6 +12,7 @@
#define VP9_DECODER_VP9_DECODEMV_H_
#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r);
diff --git a/libvpx/vp9/decoder/vp9_decodframe.c b/libvpx/vp9/decoder/vp9_decodframe.c
index ffec8ea..feb6024 100644
--- a/libvpx/vp9/decoder/vp9_decodframe.c
+++ b/libvpx/vp9/decoder/vp9_decodframe.c
@@ -31,8 +31,11 @@
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/decoder/vp9_dsubexp.h"
+#include "vp9/decoder/vp9_idct_blk.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
+#include "vp9/decoder/vp9_thread.h"
+#include "vp9/decoder/vp9_treereader.h"
static int read_be32(const uint8_t *p) {
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
@@ -59,17 +62,17 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) {
int i, j;
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
- for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j)
+ for (j = 0; j < TX_SIZES - 3; ++j)
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
- for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j)
+ for (j = 0; j < TX_SIZES - 2; ++j)
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
- for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j)
+ for (j = 0; j < TX_SIZES - 1; ++j)
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
}
@@ -138,8 +141,8 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
const int mode = plane == 0 ? mi->mbmi.mode
: mi->mbmi.uv_mode;
- if (plane == 0 && mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
- assert(bsize == BLOCK_SIZE_SB8X8);
+ if (plane == 0 && mi->mbmi.sb_type < BLOCK_8X8) {
+ assert(bsize == BLOCK_8X8);
b_mode = mi->bmi[raster_block].as_mode;
} else {
b_mode = mode;
@@ -223,7 +226,7 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col,
vp9_reader *r, BLOCK_SIZE_TYPE bsize) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- const int less8x8 = bsize < BLOCK_SIZE_SB8X8;
+ const int less8x8 = bsize < BLOCK_8X8;
MB_MODE_INFO *mbmi;
if (less8x8)
@@ -234,12 +237,12 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col,
vp9_read_mode_info(pbi, mi_row, mi_col, r);
if (less8x8)
- bsize = BLOCK_SIZE_SB8X8;
+ bsize = BLOCK_8X8;
// Has to be called after set_offsets
mbmi = &xd->mode_info_context->mbmi;
- if (mbmi->ref_frame[0] == INTRA_FRAME) {
+ if (!is_inter_block(mbmi)) {
// Intra reconstruction
decode_tokens(pbi, bsize, r);
foreach_transformed_block(xd, bsize, decode_block_intra, xd);
@@ -280,12 +283,12 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
if (mi_row >= pc->mi_rows || mi_col >= pc->mi_cols)
return;
- if (bsize < BLOCK_SIZE_SB8X8) {
+ if (bsize < BLOCK_8X8) {
if (xd->ab_index != 0)
return;
} else {
int pl;
- const int idx = check_bsize_coverage(pc, xd, mi_row, mi_col, bsize);
+ const int idx = check_bsize_coverage(pc, mi_row, mi_col, bsize);
set_partition_seg_context(pc, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
@@ -332,8 +335,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
}
// update partition context
- if (bsize >= BLOCK_SIZE_SB8X8 &&
- (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) {
+ if (bsize >= BLOCK_8X8 &&
+ (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) {
set_partition_seg_context(pc, xd, mi_row, mi_col);
update_partition_context(xd, subsize, bsize);
}
@@ -499,7 +502,7 @@ static INTERPOLATIONFILTERTYPE read_interp_filter_type(
: vp9_rb_read_literal(rb, 2);
}
-static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb,
+static void read_frame_size(struct vp9_read_bit_buffer *rb,
int *width, int *height) {
const int w = vp9_rb_read_literal(rb, 16) + 1;
const int h = vp9_rb_read_literal(rb, 16) + 1;
@@ -507,12 +510,11 @@ static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb,
*height = h;
}
-static void setup_display_size(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) {
- VP9_COMMON *const cm = &pbi->common;
+static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
cm->display_width = cm->width;
cm->display_height = cm->height;
if (vp9_rb_read_bit(rb))
- read_frame_size(cm, rb, &cm->display_width, &cm->display_height);
+ read_frame_size(rb, &cm->display_width, &cm->display_height);
}
static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
@@ -548,10 +550,9 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
static void setup_frame_size(VP9D_COMP *pbi,
struct vp9_read_bit_buffer *rb) {
- VP9_COMMON *const cm = &pbi->common;
int width, height;
- read_frame_size(cm, rb, &width, &height);
- setup_display_size(pbi, rb);
+ read_frame_size(rb, &width, &height);
+ setup_display_size(&pbi->common, rb);
apply_frame_size(pbi, width, height);
}
@@ -572,21 +573,29 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
}
if (!found)
- read_frame_size(cm, rb, &width, &height);
+ read_frame_size(rb, &width, &height);
if (!width || !height)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Referenced frame with invalid size");
- setup_display_size(pbi, rb);
+ setup_display_size(cm, rb);
apply_frame_size(pbi, width, height);
}
static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
+ const int num_threads = pbi->oxcf.max_threads;
VP9_COMMON *const pc = &pbi->common;
int mi_row, mi_col;
if (pbi->do_loopfilter_inline) {
+ if (num_threads > 1) {
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+ lf_data->frame_buffer = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ lf_data->cm = pc;
+ lf_data->xd = pbi->mb;
+ lf_data->y_only = 0;
+ }
vp9_loop_filter_frame_init(pc, &pbi->mb, pbi->mb.lf.filter_level);
}
@@ -597,21 +606,37 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
vpx_memset(pc->left_seg_context, 0, sizeof(pc->left_seg_context));
for (mi_col = pc->cur_tile_mi_col_start; mi_col < pc->cur_tile_mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_SIZE_SB64X64);
+ decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64);
}
if (pbi->do_loopfilter_inline) {
- YV12_BUFFER_CONFIG *const fb =
- &pbi->common.yv12_fb[pbi->common.new_fb_idx];
// delay the loopfilter by 1 macroblock row.
const int lf_start = mi_row - MI_BLOCK_SIZE;
if (lf_start < 0) continue;
- vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
+
+ if (num_threads > 1) {
+ LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+
+ vp9_worker_sync(&pbi->lf_worker);
+ lf_data->start = lf_start;
+ lf_data->stop = mi_row;
+ pbi->lf_worker.hook = vp9_loop_filter_worker;
+ vp9_worker_launch(&pbi->lf_worker);
+ } else {
+ YV12_BUFFER_CONFIG *const fb =
+ &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
+ }
}
}
if (pbi->do_loopfilter_inline) {
YV12_BUFFER_CONFIG *const fb = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ if (num_threads > 1) {
+ // TODO(jzern): since the loop filter is delayed one mb row, this will be
+ // forced to wait for the last row scheduled in the for loop.
+ vp9_worker_sync(&pbi->lf_worker);
+ }
vp9_loop_filter_rows(fb, pc, &pbi->mb,
mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0);
}
@@ -994,7 +1019,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
if (!keyframe && !pc->intra_only) {
vp9_adapt_mode_probs(pc);
- vp9_adapt_mode_context(pc);
vp9_adapt_mv_probs(pc, xd->allow_high_precision_mv);
}
}
diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c
index 01c1db0..0021643 100644
--- a/libvpx/vp9/decoder/vp9_detokenize.c
+++ b/libvpx/vp9/decoder/vp9_detokenize.c
@@ -15,8 +15,10 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_seg_common.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/decoder/vp9_treereader.h"
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
@@ -73,7 +75,7 @@ DECLARE_ALIGNED(16, extern const uint8_t,
#define WRITE_COEF_CONTINUE(val, token) \
{ \
qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \
- dq[c > 0] / (1 + (txfm_size == TX_32X32)); \
+ dq[c > 0] / (1 + (tx_size == TX_32X32)); \
INCREMENT_COUNT(token); \
c++; \
continue; \
@@ -88,33 +90,24 @@ DECLARE_ALIGNED(16, extern const uint8_t,
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
vp9_reader *r, int block_idx,
PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr,
- TX_SIZE txfm_size, const int16_t *dq,
+ TX_SIZE tx_size, const int16_t *dq,
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) {
FRAME_CONTEXT *const fc = &cm->fc;
FRAME_COUNTS *const counts = &cm->counts;
ENTROPY_CONTEXT above_ec, left_ec;
- int pt, c = 0;
- int band;
- vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES];
+ const int ref = is_inter_block(&xd->mode_info_context->mbmi);
+ int band, pt, c = 0;
+ vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] =
+ fc->coef_probs[tx_size][type][ref];
vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
- uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = {
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0},
- };
-
+ uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { { 0 } };
vp9_prob *prob;
- vp9_coeff_count_model *coef_counts;
- const int ref = xd->mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME;
+ vp9_coeff_count_model *coef_counts = counts->coef[tx_size];
const int16_t *scan, *nb;
uint8_t token_cache[1024];
const uint8_t * band_translate;
- coef_probs = fc->coef_probs[txfm_size][type][ref];
- coef_counts = counts->coef[txfm_size];
- switch (txfm_size) {
+
+ switch (tx_size) {
default:
case TX_4X4: {
scan = get_scan_4x4(get_tx_type_4x4(type, xd, block_idx));
@@ -125,22 +118,22 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
}
case TX_8X8: {
scan = get_scan_8x8(get_tx_type_8x8(type, xd));
- above_ec = (A[0] + A[1]) != 0;
- left_ec = (L[0] + L[1]) != 0;
+ above_ec = !!*(uint16_t *)A;
+ left_ec = !!*(uint16_t *)L;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_16X16: {
scan = get_scan_16x16(get_tx_type_16x16(type, xd));
- above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
- left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
+ above_ec = !!*(uint32_t *)A;
+ left_ec = !!*(uint32_t *)L;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
case TX_32X32:
scan = vp9_default_scan_32x32;
- above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
- left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
+ above_ec = !!*(uint64_t *)A;
+ left_ec = !!*(uint64_t *)L;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
@@ -157,7 +150,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
pt = get_coef_context(nb, token_cache, c);
band = get_coef_band(band_translate, c);
prob = coef_probs[band][pt];
- counts->eob_branch[txfm_size][type][ref][band][pt]++;
+ counts->eob_branch[tx_size][type][ref][band][pt]++;
if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
break;
@@ -276,7 +269,7 @@ static void decode_block(int plane, int block,
const int mod = bw - ss_tx_size - pd->subsampling_x;
const int aoff = (off & ((1 << mod) - 1)) << ss_tx_size;
const int loff = (off >> mod) << ss_tx_size;
-
+ const int tx_size_in_blocks = 1 << ss_tx_size;
ENTROPY_CONTEXT *A = pd->above_context + aoff;
ENTROPY_CONTEXT *L = pd->left_context + loff;
const int eob = decode_coefs(&arg->pbi->common, xd, arg->r, block,
@@ -285,10 +278,11 @@ static void decode_block(int plane, int block,
ss_tx_size, pd->dequant, A, L);
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
- set_contexts_on_border(xd, bsize, plane, ss_tx_size, eob, aoff, loff, A, L);
+ set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, eob, aoff, loff,
+ A, L);
} else {
int pt;
- for (pt = 0; pt < (1 << ss_tx_size); pt++)
+ for (pt = 0; pt < tx_size_in_blocks; pt++)
A[pt] = L[pt] = eob > 0;
}
pd->eobs[block] = eob;
diff --git a/libvpx/vp9/decoder/vp9_detokenize.h b/libvpx/vp9/decoder/vp9_detokenize.h
index d46b596..f98fe8d 100644
--- a/libvpx/vp9/decoder/vp9_detokenize.h
+++ b/libvpx/vp9/decoder/vp9_detokenize.h
@@ -13,6 +13,7 @@
#define VP9_DECODER_VP9_DETOKENIZE_H_
#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
int vp9_decode_tokens(VP9D_COMP* pbi, vp9_reader *r, BLOCK_SIZE_TYPE bsize);
diff --git a/libvpx/vp9/decoder/vp9_idct_blk.c b/libvpx/vp9/decoder/vp9_idct_blk.c
index 0217919..395e636 100644
--- a/libvpx/vp9/decoder/vp9_idct_blk.c
+++ b/libvpx/vp9/decoder/vp9_idct_blk.c
@@ -93,15 +93,11 @@ void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
if (eob) {
if (eob == 1) {
// DC only DCT coefficient
- int16_t in = input[0];
- int16_t out;
-
- // Note: the idct1 will need to be modified accordingly whenever
- // vp9_short_idct8x8_c() is modified.
- vp9_short_idct1_8x8_c(&in, &out);
+ vp9_short_idct8x8_1_add(input, dest, stride);
input[0] = 0;
-
- vp9_add_constant_residual_8x8(out, dest, stride);
+ } else if (eob <= 10) {
+ vp9_short_idct10_8x8_add(input, dest, stride);
+ vpx_memset(input, 0, 128);
} else {
vp9_short_idct8x8_add(input, dest, stride);
vpx_memset(input, 0, 128);
@@ -127,14 +123,11 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
if (eob) {
if (eob == 1) {
/* DC only DCT coefficient. */
- int16_t in = input[0];
- int16_t out;
- /* Note: the idct1 will need to be modified accordingly whenever
- * vp9_short_idct16x16() is modified. */
- vp9_short_idct1_16x16_c(&in, &out);
+ vp9_short_idct16x16_1_add(input, dest, stride);
input[0] = 0;
-
- vp9_add_constant_residual_16x16(out, dest, stride);
+ } else if (eob <= 10) {
+ vp9_short_idct10_16x16_add(input, dest, stride);
+ vpx_memset(input, 0, 512);
} else {
vp9_short_idct16x16_add(input, dest, stride);
vpx_memset(input, 0, 512);
diff --git a/libvpx/vp9/decoder/vp9_onyxd_if.c b/libvpx/vp9/decoder/vp9_onyxd_if.c
index cb72920..5a01dd7 100644
--- a/libvpx/vp9/decoder/vp9_onyxd_if.c
+++ b/libvpx/vp9/decoder/vp9_onyxd_if.c
@@ -8,9 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
-#include <stdio.h>
#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
#include "vp9/common/vp9_onyxc_int.h"
#if CONFIG_POSTPROC
@@ -114,7 +114,7 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
if (!pbi)
return NULL;
- vpx_memset(pbi, 0, sizeof(VP9D_COMP));
+ vp9_zero(*pbi);
if (setjmp(pbi->common.error.jmp)) {
pbi->common.error.setjmp = 0;
@@ -141,6 +141,16 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
pbi->common.error.setjmp = 0;
pbi->decoded_key_frame = 0;
+ if (pbi->oxcf.max_threads > 1) {
+ vp9_worker_init(&pbi->lf_worker);
+ pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData));
+ pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
+ if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) {
+ vp9_remove_decompressor(pbi);
+ return NULL;
+ }
+ }
+
return pbi;
}
@@ -154,6 +164,8 @@ void vp9_remove_decompressor(VP9D_PTR ptr) {
vpx_free(pbi->common.last_frame_seg_map);
vp9_remove_common(&pbi->common);
+ vp9_worker_end(&pbi->lf_worker);
+ vpx_free(pbi->lf_worker.data1);
vpx_free(pbi);
}
diff --git a/libvpx/vp9/decoder/vp9_onyxd_int.h b/libvpx/vp9/decoder/vp9_onyxd_int.h
index 4760066..a051971 100644
--- a/libvpx/vp9/decoder/vp9_onyxd_int.h
+++ b/libvpx/vp9/decoder/vp9_onyxd_int.h
@@ -14,10 +14,8 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_onyxc_int.h"
-
-#include "vp9/decoder/vp9_idct_blk.h"
#include "vp9/decoder/vp9_onyxd.h"
-#include "vp9/decoder/vp9_treereader.h"
+#include "vp9/decoder/vp9_thread.h"
typedef struct VP9Decompressor {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
@@ -40,6 +38,7 @@ typedef struct VP9Decompressor {
int initial_height;
int do_loopfilter_inline; // apply loopfilter to available rows immediately
+ VP9Worker lf_worker;
} VP9D_COMP;
#endif // VP9_DECODER_VP9_TREEREADER_H_
diff --git a/libvpx/vp9/decoder/vp9_thread.c b/libvpx/vp9/decoder/vp9_thread.c
new file mode 100644
index 0000000..dc3b681
--- /dev/null
+++ b/libvpx/vp9/decoder/vp9_thread.c
@@ -0,0 +1,248 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Original source:
+// http://git.chromium.org/webm/libwebp.git
+// 100644 blob eff8f2a8c20095aade3c292b0e9292dac6cb3587 src/utils/thread.c
+
+
+#include <assert.h>
+#include <string.h> // for memset()
+#include "./vp9_thread.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if CONFIG_MULTITHREAD
+
+#if defined(_WIN32)
+
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+#include <process.h>
+
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+static int pthread_create(pthread_t* const thread, const void* attr,
+ unsigned int (__stdcall *start)(void*), void* arg) {
+ (void)attr;
+ *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
+ 0, /* unsigned stack_size */
+ start,
+ arg,
+ 0, /* unsigned initflag */
+ NULL); /* unsigned *thrdaddr */
+ if (*thread == NULL) return 1;
+ SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+ return 0;
+}
+
+static int pthread_join(pthread_t thread, void** value_ptr) {
+ (void)value_ptr;
+ return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
+ CloseHandle(thread) == 0);
+}
+
+// Mutex
+static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
+ (void)mutexattr;
+ InitializeCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
+ EnterCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
+ LeaveCriticalSection(mutex);
+ return 0;
+}
+
+static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
+ DeleteCriticalSection(mutex);
+ return 0;
+}
+
+// Condition
+static int pthread_cond_destroy(pthread_cond_t* const condition) {
+ int ok = 1;
+ ok &= (CloseHandle(condition->waiting_sem_) != 0);
+ ok &= (CloseHandle(condition->received_sem_) != 0);
+ ok &= (CloseHandle(condition->signal_event_) != 0);
+ return !ok;
+}
+
+static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
+ (void)cond_attr;
+ condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+ if (condition->waiting_sem_ == NULL ||
+ condition->received_sem_ == NULL ||
+ condition->signal_event_ == NULL) {
+ pthread_cond_destroy(condition);
+ return 1;
+ }
+ return 0;
+}
+
+static int pthread_cond_signal(pthread_cond_t* const condition) {
+ int ok = 1;
+ if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+ // a thread is waiting in pthread_cond_wait: allow it to be notified
+ ok = SetEvent(condition->signal_event_);
+ // wait until the event is consumed so the signaler cannot consume
+ // the event via its own pthread_cond_wait.
+ ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
+ WAIT_OBJECT_0);
+ }
+ return !ok;
+}
+
+static int pthread_cond_wait(pthread_cond_t* const condition,
+ pthread_mutex_t* const mutex) {
+ int ok;
+ // note that there is a consumer available so the signal isn't dropped in
+ // pthread_cond_signal
+ if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
+ return 1;
+ // now unlock the mutex so pthread_cond_signal may be issued
+ pthread_mutex_unlock(mutex);
+ ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
+ WAIT_OBJECT_0);
+ ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+ pthread_mutex_lock(mutex);
+ return !ok;
+}
+
+#else // _WIN32
+# define THREADFN void*
+# define THREAD_RETURN(val) val
+#endif
+
+//------------------------------------------------------------------------------
+
+static THREADFN thread_loop(void *ptr) { // thread loop
+ VP9Worker* const worker = (VP9Worker*)ptr;
+ int done = 0;
+ while (!done) {
+ pthread_mutex_lock(&worker->mutex_);
+ while (worker->status_ == OK) { // wait in idling mode
+ pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ }
+ if (worker->status_ == WORK) {
+ if (worker->hook) {
+ worker->had_error |= !worker->hook(worker->data1, worker->data2);
+ }
+ worker->status_ = OK;
+ } else if (worker->status_ == NOT_OK) { // finish the worker
+ done = 1;
+ }
+ // signal to the main thread that we're done (for Sync())
+ pthread_cond_signal(&worker->condition_);
+ pthread_mutex_unlock(&worker->mutex_);
+ }
+ return THREAD_RETURN(NULL); // Thread is finished
+}
+
+// main thread state control
+static void change_state(VP9Worker* const worker,
+ VP9WorkerStatus new_status) {
+ // no-op when attempting to change state on a thread that didn't come up
+ if (worker->status_ < OK) return;
+
+ pthread_mutex_lock(&worker->mutex_);
+ // wait for the worker to finish
+ while (worker->status_ != OK) {
+ pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ }
+ // assign new status and release the working thread if needed
+ if (new_status != OK) {
+ worker->status_ = new_status;
+ pthread_cond_signal(&worker->condition_);
+ }
+ pthread_mutex_unlock(&worker->mutex_);
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+
+void vp9_worker_init(VP9Worker* const worker) {
+ memset(worker, 0, sizeof(*worker));
+ worker->status_ = NOT_OK;
+}
+
+int vp9_worker_sync(VP9Worker* const worker) {
+#if CONFIG_MULTITHREAD
+ change_state(worker, OK);
+#endif
+ assert(worker->status_ <= OK);
+ return !worker->had_error;
+}
+
+int vp9_worker_reset(VP9Worker* const worker) {
+ int ok = 1;
+ worker->had_error = 0;
+ if (worker->status_ < OK) {
+#if CONFIG_MULTITHREAD
+ if (pthread_mutex_init(&worker->mutex_, NULL) ||
+ pthread_cond_init(&worker->condition_, NULL)) {
+ return 0;
+ }
+ pthread_mutex_lock(&worker->mutex_);
+ ok = !pthread_create(&worker->thread_, NULL, thread_loop, worker);
+ if (ok) worker->status_ = OK;
+ pthread_mutex_unlock(&worker->mutex_);
+#else
+ worker->status_ = OK;
+#endif
+ } else if (worker->status_ > OK) {
+ ok = vp9_worker_sync(worker);
+ }
+ assert(!ok || (worker->status_ == OK));
+ return ok;
+}
+
+void vp9_worker_launch(VP9Worker* const worker) {
+#if CONFIG_MULTITHREAD
+ change_state(worker, WORK);
+#else
+ if (worker->hook)
+ worker->had_error |= !worker->hook(worker->data1, worker->data2);
+#endif
+}
+
+void vp9_worker_end(VP9Worker* const worker) {
+ if (worker->status_ >= OK) {
+#if CONFIG_MULTITHREAD
+ change_state(worker, NOT_OK);
+ pthread_join(worker->thread_, NULL);
+ pthread_mutex_destroy(&worker->mutex_);
+ pthread_cond_destroy(&worker->condition_);
+#else
+ worker->status_ = NOT_OK;
+#endif
+ }
+ assert(worker->status_ == NOT_OK);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
diff --git a/libvpx/vp9/decoder/vp9_thread.h b/libvpx/vp9/decoder/vp9_thread.h
new file mode 100644
index 0000000..a8f7e04
--- /dev/null
+++ b/libvpx/vp9/decoder/vp9_thread.h
@@ -0,0 +1,93 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Original source:
+// http://git.chromium.org/webm/libwebp.git
+// 100644 blob 13a61a4c84194c3374080cbf03d881d3cd6af40d src/utils/thread.h
+
+
+#ifndef VP9_DECODER_VP9_THREAD_H_
+#define VP9_DECODER_VP9_THREAD_H_
+
+#include "vpx_config.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if CONFIG_MULTITHREAD
+
+#if defined(_WIN32)
+
+#include <windows.h>
+typedef HANDLE pthread_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
+typedef struct {
+ HANDLE waiting_sem_;
+ HANDLE received_sem_;
+ HANDLE signal_event_;
+} pthread_cond_t;
+
+#else
+
+#include <pthread.h>
+
+#endif /* _WIN32 */
+#endif /* CONFIG_MULTITHREAD */
+
+// State of the worker thread object
+typedef enum {
+ NOT_OK = 0, // object is unusable
+ OK, // ready to work
+ WORK // busy finishing the current task
+} VP9WorkerStatus;
+
+// Function to be called by the worker thread. Takes two opaque pointers as
+// arguments (data1 and data2), and should return false in case of error.
+typedef int (*VP9WorkerHook)(void*, void*);
+
+// Synchronize object used to launch job in the worker thread
+typedef struct {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t mutex_;
+ pthread_cond_t condition_;
+ pthread_t thread_;
+#endif
+ VP9WorkerStatus status_;
+ VP9WorkerHook hook; // hook to call
+ void* data1; // first argument passed to 'hook'
+ void* data2; // second argument passed to 'hook'
+ int had_error; // return value of the last call to 'hook'
+} VP9Worker;
+
+// Must be called first, before any other method.
+void vp9_worker_init(VP9Worker* const worker);
+// Must be called to initialize the object and spawn the thread. Re-entrant.
+// Will potentially launch the thread. Returns false in case of error.
+int vp9_worker_reset(VP9Worker* const worker);
+// Makes sure the previous work is finished. Returns true if worker->had_error
+// was not set and no error condition was triggered by the working thread.
+int vp9_worker_sync(VP9Worker* const worker);
+// Triggers the thread to call hook() with data1 and data2 argument. These
+// hook/data1/data2 can be changed at any time before calling this function,
+// but not be changed afterward until the next call to vp9_worker_sync().
+void vp9_worker_launch(VP9Worker* const worker);
+// Kill the thread and terminate the object. To use the object again, one
+// must call vp9_worker_reset() again.
+void vp9_worker_end(VP9Worker* const worker);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif /* VP9_DECODER_VP9_THREAD_H_ */
diff --git a/libvpx/vp9/decoder/vp9_treereader.h b/libvpx/vp9/decoder/vp9_treereader.h
index 4535688..710cc4c 100644
--- a/libvpx/vp9/decoder/vp9_treereader.h
+++ b/libvpx/vp9/decoder/vp9_treereader.h
@@ -15,7 +15,6 @@
#include "vp9/common/vp9_treecoder.h"
#include "vp9/decoder/vp9_dboolhuff.h"
-#define vp9_read_prob(r) ((vp9_prob)vp9_read_literal(r, 8))
#define vp9_read_and_apply_sign(r, value) (vp9_read_bit(r) ? -(value) : (value))
// Intent of tree data structure is to make decoding trivial.