diff options
author | hkuang <hkuang@google.com> | 2013-08-06 11:07:19 -0700 |
---|---|---|
committer | Hangyu Kuang <hkuang@google.com> | 2013-08-06 18:31:37 +0000 |
commit | f3bed9137f66ef693bd406e43b17e9a1114f1e14 (patch) | |
tree | cd1bea0cd923c6d125cb5b3e7b3404d7c2f70208 /libvpx/vp9/decoder | |
parent | a8b927ab4f06e2fc0d16d9606b57672df9899ac1 (diff) | |
download | android_external_libvpx-f3bed9137f66ef693bd406e43b17e9a1114f1e14.tar.gz android_external_libvpx-f3bed9137f66ef693bd406e43b17e9a1114f1e14.tar.bz2 android_external_libvpx-f3bed9137f66ef693bd406e43b17e9a1114f1e14.zip |
Roll latest libvpx into Android.
The latest libvpx just added initial multithread vp9 decoding support and more neon optimization.
Checkout is from master branch(hash:33afddadb9af6569bd8296ef1d48d0511b651e9d).
Change-Id: I54be2f48bc033c00876b6b1d0a3ff1eccb92a2fa
Diffstat (limited to 'libvpx/vp9/decoder')
-rw-r--r-- | libvpx/vp9/decoder/vp9_decodemv.c | 446 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_decodemv.h | 1 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_decodframe.c | 76 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_detokenize.c | 52 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_detokenize.h | 1 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_idct_blk.c | 23 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_onyxd_if.c | 18 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_onyxd_int.h | 5 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_thread.c | 248 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_thread.h | 93 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_treereader.h | 1 |
11 files changed, 661 insertions, 303 deletions
diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c index 6f0044a..a3e2ad3 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.c +++ b/libvpx/vp9/decoder/vp9_decodemv.c @@ -30,8 +30,12 @@ static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) { return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p); } -static MB_PREDICTION_MODE read_inter_mode(vp9_reader *r, const vp9_prob *p) { - return (MB_PREDICTION_MODE)treed_read(r, vp9_inter_mode_tree, p); +static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r, + uint8_t context) { + MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree, + cm->fc.inter_mode_probs[context]); + ++cm->counts.inter_mode[context][inter_mode_offset(mode)]; + return mode; } static int read_segment_id(vp9_reader *r, const struct segmentation *seg) { @@ -43,9 +47,9 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, const uint8_t context = vp9_get_pred_context_tx_size(xd); const vp9_prob *tx_probs = get_tx_probs(bsize, context, &cm->fc.tx_probs); TX_SIZE tx_size = vp9_read(r, tx_probs[0]); - if (tx_size != TX_4X4 && bsize >= BLOCK_SIZE_MB16X16) { + if (tx_size != TX_4X4 && bsize >= BLOCK_16X16) { tx_size += vp9_read(r, tx_probs[1]); - if (tx_size != TX_8X8 && bsize >= BLOCK_SIZE_SB32X32) + if (tx_size != TX_8X8 && bsize >= BLOCK_32X32) tx_size += vp9_read(r, tx_probs[2]); } @@ -54,18 +58,18 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, } static TX_SIZE read_tx_size(VP9D_COMP *pbi, TX_MODE tx_mode, - BLOCK_SIZE_TYPE bsize, int select_cond, + BLOCK_SIZE_TYPE bsize, int allow_select, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - if (tx_mode == TX_MODE_SELECT && bsize >= BLOCK_SIZE_SB8X8 && select_cond) + if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) return read_selected_tx_size(cm, xd, bsize, r); - else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_SIZE_SB32X32) + else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_32X32) return TX_32X32; - else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_SIZE_MB16X16) + else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_16X16) return TX_16X16; - else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_SIZE_SB8X8) + else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_8X8) return TX_8X8; else return TX_4X4; @@ -146,8 +150,8 @@ static uint8_t read_skip_coeff(VP9D_COMP *pbi, int segment_id, vp9_reader *r) { return skip_coeff; } -static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m, - int mi_row, int mi_col, vp9_reader *r) { +static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m, + int mi_row, int mi_col, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; MB_MODE_INFO *const mbmi = &m->mbmi; @@ -158,6 +162,7 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m, mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, bsize, 1, r); mbmi->ref_frame[0] = INTRA_FRAME; + mbmi->ref_frame[1] = NONE; if (bsize >= BLOCK_SIZE_SB8X8) { const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis); @@ -166,12 +171,12 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m, mbmi->mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]); } else { // Only 4x4, 4x8, 8x4 blocks - const int bw = 1 << b_width_log2(bsize); - const int bh = 1 << b_height_log2(bsize); + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 int idx, idy; - for (idy = 0; idy < 2; idy += bh) { - for (idx = 0; idx < 2; idx += bw) { + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { const int ib = idy * 2 + idx; const MB_PREDICTION_MODE A = above_block_mode(m, ib, mis); const MB_PREDICTION_MODE L = (xd->left_available || idx) ? @@ -179,9 +184,9 @@ static void read_intra_mode_info(VP9D_COMP *pbi, MODE_INFO *m, const MB_PREDICTION_MODE b_mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]); m->bmi[ib].as_mode = b_mode; - if (bh == 2) + if (num_4x4_h == 2) m->bmi[ib + 2].as_mode = b_mode; - if (bw == 2) + if (num_4x4_w == 2) m->bmi[ib + 1].as_mode = b_mode; } } @@ -228,16 +233,16 @@ static int read_mv_component(vp9_reader *r, static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *ctx, - nmv_context_counts *counts, int usehp) { + nmv_context_counts *counts, int allow_hp) { const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, ctx->joints); + const int use_hp = allow_hp && vp9_use_mv_hp(ref); MV diff = {0, 0}; - usehp = usehp && vp9_use_mv_hp(ref); if (mv_joint_vertical(j)) - diff.row = read_mv_component(r, &ctx->comps[0], usehp); + diff.row = read_mv_component(r, &ctx->comps[0], use_hp); if (mv_joint_horizontal(j)) - diff.col = read_mv_component(r, &ctx->comps[1], usehp); + diff.col = read_mv_component(r, &ctx->comps[1], use_hp); vp9_inc_mv(&diff, counts); @@ -245,29 +250,30 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, mv->col = ref->col + diff.col; } -static void update_mv(vp9_reader *r, vp9_prob *p, vp9_prob upd_p) { - if (vp9_read(r, upd_p)) +static void update_mv(vp9_reader *r, vp9_prob *p) { + if (vp9_read(r, VP9_NMV_UPDATE_PROB)) *p = (vp9_read_literal(r, 7) << 1) | 1; } -static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int usehp) { +static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) { int i, j, k; for (j = 0; j < MV_JOINTS - 1; ++j) - update_mv(r, &mvc->joints[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &mvc->joints[j]); for (i = 0; i < 2; ++i) { nmv_component *const comp = &mvc->comps[i]; - update_mv(r, &comp->sign, VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->sign); + for (j = 0; j < MV_CLASSES - 1; ++j) - update_mv(r, &comp->classes[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->classes[j]); for (j = 0; j < CLASS0_SIZE - 1; ++j) - update_mv(r, &comp->class0[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->class0[j]); for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(r, &comp->bits[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->bits[j]); } for (i = 0; i < 2; ++i) { @@ -275,23 +281,23 @@ static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int usehp) { for (j = 0; j < CLASS0_SIZE; ++j) for (k = 0; k < 3; ++k) - update_mv(r, &comp->class0_fp[j][k], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->class0_fp[j][k]); for (j = 0; j < 3; ++j) - update_mv(r, &comp->fp[j], VP9_NMV_UPDATE_PROB); + update_mv(r, &comp->fp[j]); } - if (usehp) { + if (allow_hp) { for (i = 0; i < 2; ++i) { - update_mv(r, &mvc->comps[i].class0_hp, VP9_NMV_UPDATE_PROB); - update_mv(r, &mvc->comps[i].hp, VP9_NMV_UPDATE_PROB); + update_mv(r, &mvc->comps[i].class0_hp); + update_mv(r, &mvc->comps[i].hp); } } } // Read the referncence frame -static void read_ref_frame(VP9D_COMP *pbi, vp9_reader *r, - int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { +static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, + int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; FRAME_CONTEXT *const fc = &cm->fc; @@ -320,18 +326,19 @@ static void read_ref_frame(VP9D_COMP *pbi, vp9_reader *r, ref_frame[fix_ref_idx] = cm->comp_fixed_ref; ref_frame[!fix_ref_idx] = cm->comp_var_ref[b]; } else { - const int ref1_ctx = vp9_get_pred_context_single_ref_p1(xd); - ref_frame[1] = NONE; - if (vp9_read(r, fc->single_ref_prob[ref1_ctx][0])) { - const int ref2_ctx = vp9_get_pred_context_single_ref_p2(xd); - const int b = vp9_read(r, fc->single_ref_prob[ref2_ctx][1]); - ref_frame[0] = b ? ALTREF_FRAME : GOLDEN_FRAME; - counts->single_ref[ref1_ctx][0][1]++; - counts->single_ref[ref2_ctx][1][b]++; + const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); + const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]); + ++counts->single_ref[ctx0][0][bit0]; + if (bit0) { + const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); + const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]); + ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; + ++counts->single_ref[ctx1][1][bit1]; } else { ref_frame[0] = LAST_FRAME; - counts->single_ref[ref1_ctx][0][0]++; } + + ref_frame[1] = NONE; } } } @@ -359,16 +366,6 @@ static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { return mode; } -static INLINE void assign_and_clamp_mv(int_mv *dst, const int_mv *src, - int mb_to_left_edge, - int mb_to_right_edge, - int mb_to_top_edge, - int mb_to_bottom_edge) { - dst->as_int = src->as_int; - clamp_mv(dst, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, - mb_to_bottom_edge); -} - static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( VP9D_COMP *pbi, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; @@ -380,32 +377,35 @@ static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( return vp9_switchable_interp[index]; } -static void read_intra_block_modes(VP9D_COMP *pbi, MODE_INFO *mi, - vp9_reader *r) { +static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, + vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type; - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); + + mbmi->ref_frame[0] = INTRA_FRAME; + mbmi->ref_frame[1] = NONE; if (bsize >= BLOCK_SIZE_SB8X8) { - const int size_group = MIN(3, MIN(bwl, bhl)); + const int size_group = size_group_lookup[bsize]; mbmi->mode = read_intra_mode(r, cm->fc.y_mode_prob[size_group]); cm->counts.y_mode[size_group][mbmi->mode]++; } else { // Only 4x4, 4x8, 8x4 blocks - const int bw = 1 << bwl, bh = 1 << bhl; + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 int idx, idy; - for (idy = 0; idy < 2; idy += bh) { - for (idx = 0; idx < 2; idx += bw) { + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { const int ib = idy * 2 + idx; const int b_mode = read_intra_mode(r, cm->fc.y_mode_prob[0]); mi->bmi[ib].as_mode = b_mode; cm->counts.y_mode[0][b_mode]++; - if (bh == 2) + if (num_4x4_h == 2) mi->bmi[ib + 2].as_mode = b_mode; - if (bw == 2) + if (num_4x4_w == 2) mi->bmi[ib + 1].as_mode = b_mode; } } @@ -416,203 +416,197 @@ static void read_intra_block_modes(VP9D_COMP *pbi, MODE_INFO *mi, cm->counts.uv_mode[mbmi->mode][mbmi->uv_mode]++; } -static MV_REFERENCE_FRAME read_reference_frame(VP9D_COMP *pbi, int segment_id, - vp9_reader *r) { +static int read_is_inter_block(VP9D_COMP *pbi, int segment_id, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - MV_REFERENCE_FRAME ref; - if (!vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) { - const int ctx = vp9_get_pred_context_intra_inter(xd); - ref = (MV_REFERENCE_FRAME) - vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd)); - cm->counts.intra_inter[ctx][ref != INTRA_FRAME]++; + if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) { + return vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_REF_FRAME) != + INTRA_FRAME; } else { - ref = (MV_REFERENCE_FRAME) vp9_get_segdata(&xd->seg, segment_id, - SEG_LVL_REF_FRAME) != INTRA_FRAME; + const int ctx = vp9_get_pred_context_intra_inter(xd); + const int is_inter = vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd)); + ++cm->counts.intra_inter[ctx][is_inter]; + return is_inter; } - return ref; } -static void read_inter_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, - int mi_row, int mi_col, vp9_reader *r) { +static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, + int mi_row, int mi_col, vp9_reader *r) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; nmv_context *const nmvc = &cm->fc.nmvc; MB_MODE_INFO *const mbmi = &mi->mbmi; - int_mv *const mv0 = &mbmi->mv[0]; int_mv *const mv1 = &mbmi->mv[1]; - const BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type; - const int bw = 1 << b_width_log2(bsize); - const int bh = 1 << b_height_log2(bsize); - - int idx, idy; + const BLOCK_SIZE_TYPE bsize = mbmi->sb_type; + const int allow_hp = xd->allow_high_precision_mv; - mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r); - mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); - mbmi->ref_frame[0] = read_reference_frame(pbi, mbmi->segment_id, r); - mbmi->ref_frame[1] = NONE; - mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, bsize, - (!mbmi->mb_skip_coeff || mbmi->ref_frame[0] == INTRA_FRAME), r); + int_mv nearest, nearby, best_mv; + int_mv nearest_second, nearby_second, best_mv_second; + uint8_t inter_mode_ctx; + MV_REFERENCE_FRAME ref0, ref1; - if (mbmi->ref_frame[0] != INTRA_FRAME) { - int_mv nearest, nearby, best_mv; - int_mv nearest_second, nearby_second, best_mv_second; - vp9_prob *mv_ref_p; - MV_REFERENCE_FRAME ref0, ref1; + read_ref_frames(pbi, r, mbmi->segment_id, mbmi->ref_frame); + ref0 = mbmi->ref_frame[0]; + ref1 = mbmi->ref_frame[1]; - read_ref_frame(pbi, r, mbmi->segment_id, mbmi->ref_frame); - ref0 = mbmi->ref_frame[0]; - ref1 = mbmi->ref_frame[1]; + vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, + ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias, + mi_row, mi_col); - vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, - ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias); + inter_mode_ctx = mbmi->mb_mode_context[ref0]; - mv_ref_p = cm->fc.inter_mode_probs[mbmi->mb_mode_context[ref0]]; + if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) + mbmi->mode = ZEROMV; + else if (bsize >= BLOCK_SIZE_SB8X8) + mbmi->mode = read_inter_mode(cm, r, inter_mode_ctx); - if (vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - mbmi->mode = ZEROMV; - } else if (bsize >= BLOCK_SIZE_SB8X8) { - mbmi->mode = read_inter_mode(r, mv_ref_p); - vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref0]); - } - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = DC_PRED; - // nearest, nearby - if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { - vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby); - best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int; - } + // nearest, nearby + if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { + vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby); + best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int; + } - mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE - ? read_switchable_filter_type(pbi, r) - : cm->mcomp_filter_type; + mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE + ? read_switchable_filter_type(pbi, r) + : cm->mcomp_filter_type; - if (ref1 > INTRA_FRAME) { - vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, - ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias); + if (ref1 > INTRA_FRAME) { + vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context, + ref1, mbmi->ref_mvs[ref1], cm->ref_frame_sign_bias, + mi_row, mi_col); - if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { - vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1], - &nearest_second, &nearby_second); - best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int; - } + if (bsize < BLOCK_SIZE_SB8X8 || mbmi->mode != ZEROMV) { + vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1], + &nearest_second, &nearby_second); + best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int; } + } + if (bsize < BLOCK_SIZE_SB8X8) { + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 + int idx, idy; + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { + int_mv blockmv, secondmv; + const int j = idy * 2 + idx; + const int b_mode = read_inter_mode(cm, r, inter_mode_ctx); - if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { - for (idy = 0; idy < 2; idy += bh) { - for (idx = 0; idx < 2; idx += bw) { - int_mv blockmv, secondmv; - const int j = idy * 2 + idx; - const int blockmode = read_inter_mode(r, mv_ref_p); + if (b_mode == NEARESTMV || b_mode == NEARMV) { + vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0, + mi_row, mi_col); - vp9_accum_mv_refs(cm, blockmode, mbmi->mb_mode_context[ref0]); - if (blockmode == NEARESTMV || blockmode == NEARMV) { - vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0); - if (ref1 > 0) - vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second, - &nearby_second, j, 1); - } - - switch (blockmode) { - case NEWMV: - read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, - &cm->counts.mv, xd->allow_high_precision_mv); - - if (ref1 > 0) - read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, - &cm->counts.mv, xd->allow_high_precision_mv); - break; - case NEARESTMV: - blockmv.as_int = nearest.as_int; - if (ref1 > 0) - secondmv.as_int = nearest_second.as_int; - break; - case NEARMV: - blockmv.as_int = nearby.as_int; - if (ref1 > 0) - secondmv.as_int = nearby_second.as_int; - break; - case ZEROMV: - blockmv.as_int = 0; - if (ref1 > 0) - secondmv.as_int = 0; - break; - default: - assert(!"Invalid inter mode value"); - } - mi->bmi[j].as_mv[0].as_int = blockmv.as_int; if (ref1 > 0) - mi->bmi[j].as_mv[1].as_int = secondmv.as_int; - - if (bh == 2) - mi->bmi[j + 2] = mi->bmi[j]; - if (bw == 2) - mi->bmi[j + 1] = mi->bmi[j]; - mi->mbmi.mode = blockmode; + vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second, + &nearby_second, j, 1, + mi_row, mi_col); } - } - mv0->as_int = mi->bmi[3].as_mv[0].as_int; - mv1->as_int = mi->bmi[3].as_mv[1].as_int; - } else { - const int mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN; - const int mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; - const int mb_to_left_edge = xd->mb_to_left_edge - LEFT_TOP_MARGIN; - const int mb_to_right_edge = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; - - switch (mbmi->mode) { - case NEARMV: - // Clip "next_nearest" so that it does not extend to far out of image - assign_and_clamp_mv(mv0, &nearby, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - if (ref1 > 0) - assign_and_clamp_mv(mv1, &nearby_second, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - break; - - case NEARESTMV: - // Clip "next_nearest" so that it does not extend to far out of image - assign_and_clamp_mv(mv0, &nearest, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - if (ref1 > 0) - assign_and_clamp_mv(mv1, &nearest_second, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - break; - - case ZEROMV: - mv0->as_int = 0; - if (ref1 > 0) - mv1->as_int = 0; - break; + switch (b_mode) { + case NEWMV: + read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, + &cm->counts.mv, allow_hp); - case NEWMV: - read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, - xd->allow_high_precision_mv); - if (ref1 > 0) - read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, - &cm->counts.mv, xd->allow_high_precision_mv); - break; - default: - assert(!"Invalid inter mode value"); + if (ref1 > 0) + read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, + &cm->counts.mv, allow_hp); + break; + case NEARESTMV: + blockmv.as_int = nearest.as_int; + if (ref1 > 0) + secondmv.as_int = nearest_second.as_int; + break; + case NEARMV: + blockmv.as_int = nearby.as_int; + if (ref1 > 0) + secondmv.as_int = nearby_second.as_int; + break; + case ZEROMV: + blockmv.as_int = 0; + if (ref1 > 0) + secondmv.as_int = 0; + break; + default: + assert(!"Invalid inter mode value"); + } + mi->bmi[j].as_mv[0].as_int = blockmv.as_int; + if (ref1 > 0) + mi->bmi[j].as_mv[1].as_int = secondmv.as_int; + + if (num_4x4_h == 2) + mi->bmi[j + 2] = mi->bmi[j]; + if (num_4x4_w == 2) + mi->bmi[j + 1] = mi->bmi[j]; + mi->mbmi.mode = b_mode; } } + + mv0->as_int = mi->bmi[3].as_mv[0].as_int; + mv1->as_int = mi->bmi[3].as_mv[1].as_int; } else { - mv0->as_int = 0; // required for left and above block mv - read_intra_block_modes(pbi, mi, r); + switch (mbmi->mode) { + case NEARMV: + mv0->as_int = nearby.as_int; + clamp_mv2(&mv0->as_mv, xd); + + if (ref1 > 0) { + mv1->as_int = nearby_second.as_int; + clamp_mv2(&mv1->as_mv, xd); + } + break; + + case NEARESTMV: + mv0->as_int = nearest.as_int; + clamp_mv2(&mv0->as_mv, xd); + + if (ref1 > 0) { + mv1->as_int = nearest_second.as_int; + clamp_mv2(&mv1->as_mv, xd); + } + break; + + case ZEROMV: + mv0->as_int = 0; + if (ref1 > 0) + mv1->as_int = 0; + break; + + case NEWMV: + read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, allow_hp); + if (ref1 > 0) + read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, &cm->counts.mv, + allow_hp); + break; + default: + assert(!"Invalid inter mode value"); + } } } +static void read_inter_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, + int mi_row, int mi_col, vp9_reader *r) { + VP9_COMMON *const cm = &pbi->common; + MB_MODE_INFO *const mbmi = &mi->mbmi; + int inter_block; + + mbmi->mv[0].as_int = 0; + mbmi->mv[1].as_int = 0; + mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r); + mbmi->mb_skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); + inter_block = read_is_inter_block(pbi, mbmi->segment_id, r); + mbmi->txfm_size = read_tx_size(pbi, cm->tx_mode, mbmi->sb_type, + !mbmi->mb_skip_coeff || !inter_block, r); + + if (inter_block) + read_inter_block_mode_info(pbi, mi, mi_row, mi_col, r); + else + read_intra_block_mode_info(pbi, mi, r); +} + static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { int i; @@ -690,9 +684,9 @@ void vp9_read_mode_info(VP9D_COMP* pbi, int mi_row, int mi_col, vp9_reader *r) { int x, y; if (cm->frame_type == KEY_FRAME || cm->intra_only) - read_intra_mode_info(pbi, mi, mi_row, mi_col, r); + read_intra_frame_mode_info(pbi, mi, mi_row, mi_col, r); else - read_inter_mode_info(pbi, mi, mi_row, mi_col, r); + read_inter_frame_mode_info(pbi, mi, mi_row, mi_col, r); for (y = 0; y < y_mis; y++) for (x = !y; x < x_mis; x++) diff --git a/libvpx/vp9/decoder/vp9_decodemv.h b/libvpx/vp9/decoder/vp9_decodemv.h index 4073d9e..462d2e3 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.h +++ b/libvpx/vp9/decoder/vp9_decodemv.h @@ -12,6 +12,7 @@ #define VP9_DECODER_VP9_DECODEMV_H_ #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/decoder/vp9_dboolhuff.h" void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r); diff --git a/libvpx/vp9/decoder/vp9_decodframe.c b/libvpx/vp9/decoder/vp9_decodframe.c index ffec8ea..feb6024 100644 --- a/libvpx/vp9/decoder/vp9_decodframe.c +++ b/libvpx/vp9/decoder/vp9_decodframe.c @@ -31,8 +31,11 @@ #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_dsubexp.h" +#include "vp9/decoder/vp9_idct_blk.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_read_bit_buffer.h" +#include "vp9/decoder/vp9_thread.h" +#include "vp9/decoder/vp9_treereader.h" static int read_be32(const uint8_t *p) { return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; @@ -59,17 +62,17 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) { int i, j; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) + for (j = 0; j < TX_SIZES - 3; ++j) if (vp9_read(r, VP9_MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) + for (j = 0; j < TX_SIZES - 2; ++j) if (vp9_read(r, VP9_MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) + for (j = 0; j < TX_SIZES - 1; ++j) if (vp9_read(r, VP9_MODE_UPDATE_PROB)) vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); } @@ -138,8 +141,8 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, const int mode = plane == 0 ? mi->mbmi.mode : mi->mbmi.uv_mode; - if (plane == 0 && mi->mbmi.sb_type < BLOCK_SIZE_SB8X8) { - assert(bsize == BLOCK_SIZE_SB8X8); + if (plane == 0 && mi->mbmi.sb_type < BLOCK_8X8) { + assert(bsize == BLOCK_8X8); b_mode = mi->bmi[raster_block].as_mode; } else { b_mode = mode; @@ -223,7 +226,7 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE_TYPE bsize) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - const int less8x8 = bsize < BLOCK_SIZE_SB8X8; + const int less8x8 = bsize < BLOCK_8X8; MB_MODE_INFO *mbmi; if (less8x8) @@ -234,12 +237,12 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col, vp9_read_mode_info(pbi, mi_row, mi_col, r); if (less8x8) - bsize = BLOCK_SIZE_SB8X8; + bsize = BLOCK_8X8; // Has to be called after set_offsets mbmi = &xd->mode_info_context->mbmi; - if (mbmi->ref_frame[0] == INTRA_FRAME) { + if (!is_inter_block(mbmi)) { // Intra reconstruction decode_tokens(pbi, bsize, r); foreach_transformed_block(xd, bsize, decode_block_intra, xd); @@ -280,12 +283,12 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, if (mi_row >= pc->mi_rows || mi_col >= pc->mi_cols) return; - if (bsize < BLOCK_SIZE_SB8X8) { + if (bsize < BLOCK_8X8) { if (xd->ab_index != 0) return; } else { int pl; - const int idx = check_bsize_coverage(pc, xd, mi_row, mi_col, bsize); + const int idx = check_bsize_coverage(pc, mi_row, mi_col, bsize); set_partition_seg_context(pc, xd, mi_row, mi_col); pl = partition_plane_context(xd, bsize); @@ -332,8 +335,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, } // update partition context - if (bsize >= BLOCK_SIZE_SB8X8 && - (bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) { + if (bsize >= BLOCK_8X8 && + (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) { set_partition_seg_context(pc, xd, mi_row, mi_col); update_partition_context(xd, subsize, bsize); } @@ -499,7 +502,7 @@ static INTERPOLATIONFILTERTYPE read_interp_filter_type( : vp9_rb_read_literal(rb, 2); } -static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb, +static void read_frame_size(struct vp9_read_bit_buffer *rb, int *width, int *height) { const int w = vp9_rb_read_literal(rb, 16) + 1; const int h = vp9_rb_read_literal(rb, 16) + 1; @@ -507,12 +510,11 @@ static void read_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb, *height = h; } -static void setup_display_size(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; +static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { cm->display_width = cm->width; cm->display_height = cm->height; if (vp9_rb_read_bit(rb)) - read_frame_size(cm, rb, &cm->display_width, &cm->display_height); + read_frame_size(rb, &cm->display_width, &cm->display_height); } static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { @@ -548,10 +550,9 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { static void setup_frame_size(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; int width, height; - read_frame_size(cm, rb, &width, &height); - setup_display_size(pbi, rb); + read_frame_size(rb, &width, &height); + setup_display_size(&pbi->common, rb); apply_frame_size(pbi, width, height); } @@ -572,21 +573,29 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, } if (!found) - read_frame_size(cm, rb, &width, &height); + read_frame_size(rb, &width, &height); if (!width || !height) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame with invalid size"); - setup_display_size(pbi, rb); + setup_display_size(cm, rb); apply_frame_size(pbi, width, height); } static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { + const int num_threads = pbi->oxcf.max_threads; VP9_COMMON *const pc = &pbi->common; int mi_row, mi_col; if (pbi->do_loopfilter_inline) { + if (num_threads > 1) { + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + lf_data->frame_buffer = &pbi->common.yv12_fb[pbi->common.new_fb_idx]; + lf_data->cm = pc; + lf_data->xd = pbi->mb; + lf_data->y_only = 0; + } vp9_loop_filter_frame_init(pc, &pbi->mb, pbi->mb.lf.filter_level); } @@ -597,21 +606,37 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { vpx_memset(pc->left_seg_context, 0, sizeof(pc->left_seg_context)); for (mi_col = pc->cur_tile_mi_col_start; mi_col < pc->cur_tile_mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_SIZE_SB64X64); + decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64); } if (pbi->do_loopfilter_inline) { - YV12_BUFFER_CONFIG *const fb = - &pbi->common.yv12_fb[pbi->common.new_fb_idx]; // delay the loopfilter by 1 macroblock row. const int lf_start = mi_row - MI_BLOCK_SIZE; if (lf_start < 0) continue; - vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0); + + if (num_threads > 1) { + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + + vp9_worker_sync(&pbi->lf_worker); + lf_data->start = lf_start; + lf_data->stop = mi_row; + pbi->lf_worker.hook = vp9_loop_filter_worker; + vp9_worker_launch(&pbi->lf_worker); + } else { + YV12_BUFFER_CONFIG *const fb = + &pbi->common.yv12_fb[pbi->common.new_fb_idx]; + vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0); + } } } if (pbi->do_loopfilter_inline) { YV12_BUFFER_CONFIG *const fb = &pbi->common.yv12_fb[pbi->common.new_fb_idx]; + if (num_threads > 1) { + // TODO(jzern): since the loop filter is delayed one mb row, this will be + // forced to wait for the last row scheduled in the for loop. + vp9_worker_sync(&pbi->lf_worker); + } vp9_loop_filter_rows(fb, pc, &pbi->mb, mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0); } @@ -994,7 +1019,6 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { if (!keyframe && !pc->intra_only) { vp9_adapt_mode_probs(pc); - vp9_adapt_mode_context(pc); vp9_adapt_mv_probs(pc, xd->allow_high_precision_mv); } } diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c index 01c1db0..0021643 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.c +++ b/libvpx/vp9/decoder/vp9_detokenize.c @@ -15,8 +15,10 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_seg_common.h" +#include "vp9/decoder/vp9_dboolhuff.h" #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/decoder/vp9_treereader.h" #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 @@ -73,7 +75,7 @@ DECLARE_ALIGNED(16, extern const uint8_t, #define WRITE_COEF_CONTINUE(val, token) \ { \ qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \ - dq[c > 0] / (1 + (txfm_size == TX_32X32)); \ + dq[c > 0] / (1 + (tx_size == TX_32X32)); \ INCREMENT_COUNT(token); \ c++; \ continue; \ @@ -88,33 +90,24 @@ DECLARE_ALIGNED(16, extern const uint8_t, static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_reader *r, int block_idx, PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr, - TX_SIZE txfm_size, const int16_t *dq, + TX_SIZE tx_size, const int16_t *dq, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) { FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; ENTROPY_CONTEXT above_ec, left_ec; - int pt, c = 0; - int band; - vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES]; + const int ref = is_inter_block(&xd->mode_info_context->mbmi); + int band, pt, c = 0; + vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] = + fc->coef_probs[tx_size][type][ref]; vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0}, - }; - + uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { { 0 } }; vp9_prob *prob; - vp9_coeff_count_model *coef_counts; - const int ref = xd->mode_info_context->mbmi.ref_frame[0] != INTRA_FRAME; + vp9_coeff_count_model *coef_counts = counts->coef[tx_size]; const int16_t *scan, *nb; uint8_t token_cache[1024]; const uint8_t * band_translate; - coef_probs = fc->coef_probs[txfm_size][type][ref]; - coef_counts = counts->coef[txfm_size]; - switch (txfm_size) { + + switch (tx_size) { default: case TX_4X4: { scan = get_scan_4x4(get_tx_type_4x4(type, xd, block_idx)); @@ -125,22 +118,22 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, } case TX_8X8: { scan = get_scan_8x8(get_tx_type_8x8(type, xd)); - above_ec = (A[0] + A[1]) != 0; - left_ec = (L[0] + L[1]) != 0; + above_ec = !!*(uint16_t *)A; + left_ec = !!*(uint16_t *)L; band_translate = vp9_coefband_trans_8x8plus; break; } case TX_16X16: { scan = get_scan_16x16(get_tx_type_16x16(type, xd)); - above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; - left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; + above_ec = !!*(uint32_t *)A; + left_ec = !!*(uint32_t *)L; band_translate = vp9_coefband_trans_8x8plus; break; } case TX_32X32: scan = vp9_default_scan_32x32; - above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; - left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; + above_ec = !!*(uint64_t *)A; + left_ec = !!*(uint64_t *)L; band_translate = vp9_coefband_trans_8x8plus; break; } @@ -157,7 +150,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, pt = get_coef_context(nb, token_cache, c); band = get_coef_band(band_translate, c); prob = coef_probs[band][pt]; - counts->eob_branch[txfm_size][type][ref][band][pt]++; + counts->eob_branch[tx_size][type][ref][band][pt]++; if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) break; @@ -276,7 +269,7 @@ static void decode_block(int plane, int block, const int mod = bw - ss_tx_size - pd->subsampling_x; const int aoff = (off & ((1 << mod) - 1)) << ss_tx_size; const int loff = (off >> mod) << ss_tx_size; - + const int tx_size_in_blocks = 1 << ss_tx_size; ENTROPY_CONTEXT *A = pd->above_context + aoff; ENTROPY_CONTEXT *L = pd->left_context + loff; const int eob = decode_coefs(&arg->pbi->common, xd, arg->r, block, @@ -285,10 +278,11 @@ static void decode_block(int plane, int block, ss_tx_size, pd->dequant, A, L); if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { - set_contexts_on_border(xd, bsize, plane, ss_tx_size, eob, aoff, loff, A, L); + set_contexts_on_border(xd, bsize, plane, tx_size_in_blocks, eob, aoff, loff, + A, L); } else { int pt; - for (pt = 0; pt < (1 << ss_tx_size); pt++) + for (pt = 0; pt < tx_size_in_blocks; pt++) A[pt] = L[pt] = eob > 0; } pd->eobs[block] = eob; diff --git a/libvpx/vp9/decoder/vp9_detokenize.h b/libvpx/vp9/decoder/vp9_detokenize.h index d46b596..f98fe8d 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.h +++ b/libvpx/vp9/decoder/vp9_detokenize.h @@ -13,6 +13,7 @@ #define VP9_DECODER_VP9_DETOKENIZE_H_ #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/decoder/vp9_dboolhuff.h" int vp9_decode_tokens(VP9D_COMP* pbi, vp9_reader *r, BLOCK_SIZE_TYPE bsize); diff --git a/libvpx/vp9/decoder/vp9_idct_blk.c b/libvpx/vp9/decoder/vp9_idct_blk.c index 0217919..395e636 100644 --- a/libvpx/vp9/decoder/vp9_idct_blk.c +++ b/libvpx/vp9/decoder/vp9_idct_blk.c @@ -93,15 +93,11 @@ void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) { if (eob) { if (eob == 1) { // DC only DCT coefficient - int16_t in = input[0]; - int16_t out; - - // Note: the idct1 will need to be modified accordingly whenever - // vp9_short_idct8x8_c() is modified. - vp9_short_idct1_8x8_c(&in, &out); + vp9_short_idct8x8_1_add(input, dest, stride); input[0] = 0; - - vp9_add_constant_residual_8x8(out, dest, stride); + } else if (eob <= 10) { + vp9_short_idct10_8x8_add(input, dest, stride); + vpx_memset(input, 0, 128); } else { vp9_short_idct8x8_add(input, dest, stride); vpx_memset(input, 0, 128); @@ -127,14 +123,11 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { if (eob) { if (eob == 1) { /* DC only DCT coefficient. */ - int16_t in = input[0]; - int16_t out; - /* Note: the idct1 will need to be modified accordingly whenever - * vp9_short_idct16x16() is modified. */ - vp9_short_idct1_16x16_c(&in, &out); + vp9_short_idct16x16_1_add(input, dest, stride); input[0] = 0; - - vp9_add_constant_residual_16x16(out, dest, stride); + } else if (eob <= 10) { + vp9_short_idct10_16x16_add(input, dest, stride); + vpx_memset(input, 0, 512); } else { vp9_short_idct16x16_add(input, dest, stride); vpx_memset(input, 0, 512); diff --git a/libvpx/vp9/decoder/vp9_onyxd_if.c b/libvpx/vp9/decoder/vp9_onyxd_if.c index cb72920..5a01dd7 100644 --- a/libvpx/vp9/decoder/vp9_onyxd_if.c +++ b/libvpx/vp9/decoder/vp9_onyxd_if.c @@ -8,9 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ - -#include <stdio.h> #include <assert.h> +#include <limits.h> +#include <stdio.h> #include "vp9/common/vp9_onyxc_int.h" #if CONFIG_POSTPROC @@ -114,7 +114,7 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { if (!pbi) return NULL; - vpx_memset(pbi, 0, sizeof(VP9D_COMP)); + vp9_zero(*pbi); if (setjmp(pbi->common.error.jmp)) { pbi->common.error.setjmp = 0; @@ -141,6 +141,16 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { pbi->common.error.setjmp = 0; pbi->decoded_key_frame = 0; + if (pbi->oxcf.max_threads > 1) { + vp9_worker_init(&pbi->lf_worker); + pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData)); + pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; + if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) { + vp9_remove_decompressor(pbi); + return NULL; + } + } + return pbi; } @@ -154,6 +164,8 @@ void vp9_remove_decompressor(VP9D_PTR ptr) { vpx_free(pbi->common.last_frame_seg_map); vp9_remove_common(&pbi->common); + vp9_worker_end(&pbi->lf_worker); + vpx_free(pbi->lf_worker.data1); vpx_free(pbi); } diff --git a/libvpx/vp9/decoder/vp9_onyxd_int.h b/libvpx/vp9/decoder/vp9_onyxd_int.h index 4760066..a051971 100644 --- a/libvpx/vp9/decoder/vp9_onyxd_int.h +++ b/libvpx/vp9/decoder/vp9_onyxd_int.h @@ -14,10 +14,8 @@ #include "./vpx_config.h" #include "vp9/common/vp9_onyxc_int.h" - -#include "vp9/decoder/vp9_idct_blk.h" #include "vp9/decoder/vp9_onyxd.h" -#include "vp9/decoder/vp9_treereader.h" +#include "vp9/decoder/vp9_thread.h" typedef struct VP9Decompressor { DECLARE_ALIGNED(16, MACROBLOCKD, mb); @@ -40,6 +38,7 @@ typedef struct VP9Decompressor { int initial_height; int do_loopfilter_inline; // apply loopfilter to available rows immediately + VP9Worker lf_worker; } VP9D_COMP; #endif // VP9_DECODER_VP9_TREEREADER_H_ diff --git a/libvpx/vp9/decoder/vp9_thread.c b/libvpx/vp9/decoder/vp9_thread.c new file mode 100644 index 0000000..dc3b681 --- /dev/null +++ b/libvpx/vp9/decoder/vp9_thread.c @@ -0,0 +1,248 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Multi-threaded worker +// +// Original source: +// http://git.chromium.org/webm/libwebp.git +// 100644 blob eff8f2a8c20095aade3c292b0e9292dac6cb3587 src/utils/thread.c + + +#include <assert.h> +#include <string.h> // for memset() +#include "./vp9_thread.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#if CONFIG_MULTITHREAD + +#if defined(_WIN32) + +//------------------------------------------------------------------------------ +// simplistic pthread emulation layer + +#include <process.h> + +// _beginthreadex requires __stdcall +#define THREADFN unsigned int __stdcall +#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val) + +static int pthread_create(pthread_t* const thread, const void* attr, + unsigned int (__stdcall *start)(void*), void* arg) { + (void)attr; + *thread = (pthread_t)_beginthreadex(NULL, /* void *security */ + 0, /* unsigned stack_size */ + start, + arg, + 0, /* unsigned initflag */ + NULL); /* unsigned *thrdaddr */ + if (*thread == NULL) return 1; + SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL); + return 0; +} + +static int pthread_join(pthread_t thread, void** value_ptr) { + (void)value_ptr; + return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || + CloseHandle(thread) == 0); +} + +// Mutex +static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) { + (void)mutexattr; + InitializeCriticalSection(mutex); + return 0; +} + +static int pthread_mutex_lock(pthread_mutex_t* const mutex) { + EnterCriticalSection(mutex); + return 0; +} + +static int pthread_mutex_unlock(pthread_mutex_t* const mutex) { + LeaveCriticalSection(mutex); + return 0; +} + +static int pthread_mutex_destroy(pthread_mutex_t* const mutex) { + DeleteCriticalSection(mutex); + return 0; +} + +// Condition +static int pthread_cond_destroy(pthread_cond_t* const condition) { + int ok = 1; + ok &= (CloseHandle(condition->waiting_sem_) != 0); + ok &= (CloseHandle(condition->received_sem_) != 0); + ok &= (CloseHandle(condition->signal_event_) != 0); + return !ok; +} + +static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) { + (void)cond_attr; + condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL); + condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL); + condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); + if (condition->waiting_sem_ == NULL || + condition->received_sem_ == NULL || + condition->signal_event_ == NULL) { + pthread_cond_destroy(condition); + return 1; + } + return 0; +} + +static int pthread_cond_signal(pthread_cond_t* const condition) { + int ok = 1; + if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { + // a thread is waiting in pthread_cond_wait: allow it to be notified + ok = SetEvent(condition->signal_event_); + // wait until the event is consumed so the signaler cannot consume + // the event via its own pthread_cond_wait. + ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != + WAIT_OBJECT_0); + } + return !ok; +} + +static int pthread_cond_wait(pthread_cond_t* const condition, + pthread_mutex_t* const mutex) { + int ok; + // note that there is a consumer available so the signal isn't dropped in + // pthread_cond_signal + if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) + return 1; + // now unlock the mutex so pthread_cond_signal may be issued + pthread_mutex_unlock(mutex); + ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == + WAIT_OBJECT_0); + ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL); + pthread_mutex_lock(mutex); + return !ok; +} + +#else // _WIN32 +# define THREADFN void* +# define THREAD_RETURN(val) val +#endif + +//------------------------------------------------------------------------------ + +static THREADFN thread_loop(void *ptr) { // thread loop + VP9Worker* const worker = (VP9Worker*)ptr; + int done = 0; + while (!done) { + pthread_mutex_lock(&worker->mutex_); + while (worker->status_ == OK) { // wait in idling mode + pthread_cond_wait(&worker->condition_, &worker->mutex_); + } + if (worker->status_ == WORK) { + if (worker->hook) { + worker->had_error |= !worker->hook(worker->data1, worker->data2); + } + worker->status_ = OK; + } else if (worker->status_ == NOT_OK) { // finish the worker + done = 1; + } + // signal to the main thread that we're done (for Sync()) + pthread_cond_signal(&worker->condition_); + pthread_mutex_unlock(&worker->mutex_); + } + return THREAD_RETURN(NULL); // Thread is finished +} + +// main thread state control +static void change_state(VP9Worker* const worker, + VP9WorkerStatus new_status) { + // no-op when attempting to change state on a thread that didn't come up + if (worker->status_ < OK) return; + + pthread_mutex_lock(&worker->mutex_); + // wait for the worker to finish + while (worker->status_ != OK) { + pthread_cond_wait(&worker->condition_, &worker->mutex_); + } + // assign new status and release the working thread if needed + if (new_status != OK) { + worker->status_ = new_status; + pthread_cond_signal(&worker->condition_); + } + pthread_mutex_unlock(&worker->mutex_); +} + +#endif + +//------------------------------------------------------------------------------ + +void vp9_worker_init(VP9Worker* const worker) { + memset(worker, 0, sizeof(*worker)); + worker->status_ = NOT_OK; +} + +int vp9_worker_sync(VP9Worker* const worker) { +#if CONFIG_MULTITHREAD + change_state(worker, OK); +#endif + assert(worker->status_ <= OK); + return !worker->had_error; +} + +int vp9_worker_reset(VP9Worker* const worker) { + int ok = 1; + worker->had_error = 0; + if (worker->status_ < OK) { +#if CONFIG_MULTITHREAD + if (pthread_mutex_init(&worker->mutex_, NULL) || + pthread_cond_init(&worker->condition_, NULL)) { + return 0; + } + pthread_mutex_lock(&worker->mutex_); + ok = !pthread_create(&worker->thread_, NULL, thread_loop, worker); + if (ok) worker->status_ = OK; + pthread_mutex_unlock(&worker->mutex_); +#else + worker->status_ = OK; +#endif + } else if (worker->status_ > OK) { + ok = vp9_worker_sync(worker); + } + assert(!ok || (worker->status_ == OK)); + return ok; +} + +void vp9_worker_launch(VP9Worker* const worker) { +#if CONFIG_MULTITHREAD + change_state(worker, WORK); +#else + if (worker->hook) + worker->had_error |= !worker->hook(worker->data1, worker->data2); +#endif +} + +void vp9_worker_end(VP9Worker* const worker) { + if (worker->status_ >= OK) { +#if CONFIG_MULTITHREAD + change_state(worker, NOT_OK); + pthread_join(worker->thread_, NULL); + pthread_mutex_destroy(&worker->mutex_); + pthread_cond_destroy(&worker->condition_); +#else + worker->status_ = NOT_OK; +#endif + } + assert(worker->status_ == NOT_OK); +} + +//------------------------------------------------------------------------------ + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif diff --git a/libvpx/vp9/decoder/vp9_thread.h b/libvpx/vp9/decoder/vp9_thread.h new file mode 100644 index 0000000..a8f7e04 --- /dev/null +++ b/libvpx/vp9/decoder/vp9_thread.h @@ -0,0 +1,93 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Multi-threaded worker +// +// Original source: +// http://git.chromium.org/webm/libwebp.git +// 100644 blob 13a61a4c84194c3374080cbf03d881d3cd6af40d src/utils/thread.h + + +#ifndef VP9_DECODER_VP9_THREAD_H_ +#define VP9_DECODER_VP9_THREAD_H_ + +#include "vpx_config.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#if CONFIG_MULTITHREAD + +#if defined(_WIN32) + +#include <windows.h> +typedef HANDLE pthread_t; +typedef CRITICAL_SECTION pthread_mutex_t; +typedef struct { + HANDLE waiting_sem_; + HANDLE received_sem_; + HANDLE signal_event_; +} pthread_cond_t; + +#else + +#include <pthread.h> + +#endif /* _WIN32 */ +#endif /* CONFIG_MULTITHREAD */ + +// State of the worker thread object +typedef enum { + NOT_OK = 0, // object is unusable + OK, // ready to work + WORK // busy finishing the current task +} VP9WorkerStatus; + +// Function to be called by the worker thread. Takes two opaque pointers as +// arguments (data1 and data2), and should return false in case of error. +typedef int (*VP9WorkerHook)(void*, void*); + +// Synchronize object used to launch job in the worker thread +typedef struct { +#if CONFIG_MULTITHREAD + pthread_mutex_t mutex_; + pthread_cond_t condition_; + pthread_t thread_; +#endif + VP9WorkerStatus status_; + VP9WorkerHook hook; // hook to call + void* data1; // first argument passed to 'hook' + void* data2; // second argument passed to 'hook' + int had_error; // return value of the last call to 'hook' +} VP9Worker; + +// Must be called first, before any other method. +void vp9_worker_init(VP9Worker* const worker); +// Must be called to initialize the object and spawn the thread. Re-entrant. +// Will potentially launch the thread. Returns false in case of error. +int vp9_worker_reset(VP9Worker* const worker); +// Makes sure the previous work is finished. Returns true if worker->had_error +// was not set and no error condition was triggered by the working thread. +int vp9_worker_sync(VP9Worker* const worker); +// Triggers the thread to call hook() with data1 and data2 argument. These +// hook/data1/data2 can be changed at any time before calling this function, +// but not be changed afterward until the next call to vp9_worker_sync(). +void vp9_worker_launch(VP9Worker* const worker); +// Kill the thread and terminate the object. To use the object again, one +// must call vp9_worker_reset() again. +void vp9_worker_end(VP9Worker* const worker); + +//------------------------------------------------------------------------------ + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif /* VP9_DECODER_VP9_THREAD_H_ */ diff --git a/libvpx/vp9/decoder/vp9_treereader.h b/libvpx/vp9/decoder/vp9_treereader.h index 4535688..710cc4c 100644 --- a/libvpx/vp9/decoder/vp9_treereader.h +++ b/libvpx/vp9/decoder/vp9_treereader.h @@ -15,7 +15,6 @@ #include "vp9/common/vp9_treecoder.h" #include "vp9/decoder/vp9_dboolhuff.h" -#define vp9_read_prob(r) ((vp9_prob)vp9_read_literal(r, 8)) #define vp9_read_and_apply_sign(r, value) (vp9_read_bit(r) ? -(value) : (value)) // Intent of tree data structure is to make decoding trivial. |