diff options
author | hkuang <hkuang@google.com> | 2013-11-13 18:48:14 -0800 |
---|---|---|
committer | Android Git Automerger <android-git-automerger@android.com> | 2013-11-13 18:48:14 -0800 |
commit | a1b7a7bb1ccf3f479bbca69a52a76eb05789dbaf (patch) | |
tree | 098e8b4772cb24cf53e3430f5a28ea1b80e875bd /libvpx/vp9/decoder | |
parent | 60f286339b17c89a358efbc94fb18d322faf4552 (diff) | |
parent | 5ae7ac49f08a179e4f054d99fcfc9dce78d26e58 (diff) | |
download | android_external_libvpx-a1b7a7bb1ccf3f479bbca69a52a76eb05789dbaf.tar.gz android_external_libvpx-a1b7a7bb1ccf3f479bbca69a52a76eb05789dbaf.tar.bz2 android_external_libvpx-a1b7a7bb1ccf3f479bbca69a52a76eb05789dbaf.zip |
am 5ae7ac49: Roll latest libvpx into Android.
* commit '5ae7ac49f08a179e4f054d99fcfc9dce78d26e58':
Roll latest libvpx into Android.
Diffstat (limited to 'libvpx/vp9/decoder')
-rw-r--r-- | libvpx/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm | 230 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_dboolhuff.h | 2 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_decodemv.c | 513 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_decodemv.h | 6 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_decodframe.c | 1061 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_detokenize.c | 115 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_detokenize.h | 5 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_dsubexp.c | 15 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_idct_blk.c | 152 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_idct_blk.h | 30 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_onyxd.h | 8 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_onyxd_if.c | 104 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_onyxd_int.h | 16 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_read_bit_buffer.h | 6 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_thread.c | 17 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_thread.h | 13 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_treereader.h | 3 | ||||
-rw-r--r-- | libvpx/vp9/decoder/x86/vp9_dequantize_sse2.c | 220 |
18 files changed, 1070 insertions, 1446 deletions
diff --git a/libvpx/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm b/libvpx/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm deleted file mode 100644 index 174e747..0000000 --- a/libvpx/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm +++ /dev/null @@ -1,230 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp9_add_constant_residual_8x8_neon| - EXPORT |vp9_add_constant_residual_16x16_neon| - EXPORT |vp9_add_constant_residual_32x32_neon| - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - MACRO - LD_16x8 $src, $stride - vld1.8 {q8}, [$src], $stride - vld1.8 {q9}, [$src], $stride - vld1.8 {q10}, [$src], $stride - vld1.8 {q11}, [$src], $stride - vld1.8 {q12}, [$src], $stride - vld1.8 {q13}, [$src], $stride - vld1.8 {q14}, [$src], $stride - vld1.8 {q15}, [$src], $stride - MEND - - MACRO - ADD_DIFF_16x8 $diff - vqadd.u8 q8, q8, $diff - vqadd.u8 q9, q9, $diff - vqadd.u8 q10, q10, $diff - vqadd.u8 q11, q11, $diff - vqadd.u8 q12, q12, $diff - vqadd.u8 q13, q13, $diff - vqadd.u8 q14, q14, $diff - vqadd.u8 q15, q15, $diff - MEND - - MACRO - SUB_DIFF_16x8 $diff - vqsub.u8 q8, q8, $diff - vqsub.u8 q9, q9, $diff - vqsub.u8 q10, q10, $diff - vqsub.u8 q11, q11, $diff - vqsub.u8 q12, q12, $diff - vqsub.u8 q13, q13, $diff - vqsub.u8 q14, q14, $diff - vqsub.u8 q15, q15, $diff - MEND - - MACRO - ST_16x8 $dst, $stride - vst1.8 {q8}, [$dst], $stride - vst1.8 {q9}, [$dst], $stride - vst1.8 {q10}, [$dst], $stride - vst1.8 {q11}, [$dst], $stride - vst1.8 {q12}, [$dst], $stride - vst1.8 {q13}, [$dst], $stride - vst1.8 {q14}, [$dst], $stride - vst1.8 {q15}, [$dst], $stride - MEND - -; void add_constant_residual(const int16_t diff, uint8_t *dest, int stride, -; int width, int height) { -; int r, c; -; -; for (r = 0; r < height; r++) { -; for (c = 0; c < width; c++) -; dest[c] = clip_pixel(diff + dest[c]); -; -; dest += stride; -; } -;} -;void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest, -; int stride) { -; add_constant_residual(diff, dest, stride, 8, 8); -;} -; r0 : const int16_t diff -; r1 : const uint8_t *dest -; r2 : int stride -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -|vp9_add_constant_residual_8x8_neon| PROC - mov r3, r1 ; r3: save dest to r3 - vld1.8 {d0}, [r1], r2 - vld1.8 {d1}, [r1], r2 - vld1.8 {d2}, [r1], r2 - vld1.8 {d3}, [r1], r2 - vld1.8 {d4}, [r1], r2 - vld1.8 {d5}, [r1], r2 - vld1.8 {d6}, [r1], r2 - vld1.8 {d7}, [r1], r2 - cmp r0, #0 - bge DIFF_POSITIVE_8x8 - -DIFF_NEGATIVE_8x8 ; diff < 0 - neg r0, r0 - usat r0, #8, r0 - vdup.u8 q8, r0 - - vqsub.u8 q0, q0, q8 - vqsub.u8 q1, q1, q8 - vqsub.u8 q2, q2, q8 - vqsub.u8 q3, q3, q8 - b DIFF_SAVE_8x8 - -DIFF_POSITIVE_8x8 ; diff >= 0 - usat r0, #8, r0 - vdup.u8 q8, r0 - - vqadd.u8 q0, q0, q8 - vqadd.u8 q1, q1, q8 - vqadd.u8 q2, q2, q8 - vqadd.u8 q3, q3, q8 - -DIFF_SAVE_8x8 - vst1.8 {d0}, [r3], r2 - vst1.8 {d1}, [r3], r2 - vst1.8 {d2}, [r3], r2 - vst1.8 {d3}, [r3], r2 - vst1.8 {d4}, [r3], r2 - vst1.8 {d5}, [r3], r2 - vst1.8 {d6}, [r3], r2 - vst1.8 {d7}, [r3], r2 - - bx lr - ENDP - -;void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest, -; int stride) { -; add_constant_residual(diff, dest, stride, 16, 16); -;} -; r0 : const int16_t diff -; r1 : const uint8_t *dest -; r2 : int stride -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -|vp9_add_constant_residual_16x16_neon| PROC - mov r3, r1 - LD_16x8 r1, r2 - cmp r0, #0 - bge DIFF_POSITIVE_16x16 - -|DIFF_NEGATIVE_16x16| - neg r0, r0 - usat r0, #8, r0 - vdup.u8 q0, r0 - - SUB_DIFF_16x8 q0 - ST_16x8 r3, r2 - LD_16x8 r1, r2 - SUB_DIFF_16x8 q0 - b DIFF_SAVE_16x16 - -|DIFF_POSITIVE_16x16| - usat r0, #8, r0 - vdup.u8 q0, r0 - - ADD_DIFF_16x8 q0 - ST_16x8 r3, r2 - LD_16x8 r1, r2 - ADD_DIFF_16x8 q0 - -|DIFF_SAVE_16x16| - ST_16x8 r3, r2 - bx lr - ENDP - -;void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest, -; int stride) { -; add_constant_residual(diff, dest, stride, 32, 32); -;} -; r0 : const int16_t diff -; r1 : const uint8_t *dest -; r2 : int stride -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -|vp9_add_constant_residual_32x32_neon| PROC - push {r4,lr} - pld [r1] - mov r3, r1 - add r4, r1, #16 ; r4 dest + 16 for second loop - cmp r0, #0 - bge DIFF_POSITIVE_32x32 - -|DIFF_NEGATIVE_32x32| - neg r0, r0 - usat r0, #8, r0 - vdup.u8 q0, r0 - mov r0, #4 - -|DIFF_NEGATIVE_32x32_LOOP| - sub r0, #1 - LD_16x8 r1, r2 - SUB_DIFF_16x8 q0 - ST_16x8 r3, r2 - - LD_16x8 r1, r2 - SUB_DIFF_16x8 q0 - ST_16x8 r3, r2 - cmp r0, #2 - moveq r1, r4 - moveq r3, r4 - cmp r0, #0 - bne DIFF_NEGATIVE_32x32_LOOP - pop {r4,pc} - -|DIFF_POSITIVE_32x32| - usat r0, #8, r0 - vdup.u8 q0, r0 - mov r0, #4 - -|DIFF_POSITIVE_32x32_LOOP| - sub r0, #1 - LD_16x8 r1, r2 - ADD_DIFF_16x8 q0 - ST_16x8 r3, r2 - - LD_16x8 r1, r2 - ADD_DIFF_16x8 q0 - ST_16x8 r3, r2 - cmp r0, #2 - moveq r1, r4 - moveq r3, r4 - cmp r0, #0 - bne DIFF_POSITIVE_32x32_LOOP - pop {r4,pc} - ENDP - - END diff --git a/libvpx/vp9/decoder/vp9_dboolhuff.h b/libvpx/vp9/decoder/vp9_dboolhuff.h index c864516..fd8e74c 100644 --- a/libvpx/vp9/decoder/vp9_dboolhuff.h +++ b/libvpx/vp9/decoder/vp9_dboolhuff.h @@ -44,7 +44,7 @@ static int vp9_read(vp9_reader *br, int probability) { VP9_BD_VALUE bigsplit; int count; unsigned int range; - unsigned int split = 1 + (((br->range - 1) * probability) >> 8); + unsigned int split = ((br->range * probability) + (256 - probability)) >> 8; if (br->count < 0) vp9_reader_fill(br); diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c index 84a29b1..9792d2c 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.c +++ b/libvpx/vp9/decoder/vp9_decodemv.c @@ -23,18 +23,36 @@ #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_decodframe.h" #include "vp9/decoder/vp9_onyxd_int.h" -#include "vp9/decoder/vp9_dsubexp.h" #include "vp9/decoder/vp9_treereader.h" static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) { return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p); } +static MB_PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, vp9_reader *r, + int size_group) { + const MB_PREDICTION_MODE y_mode = read_intra_mode(r, + cm->fc.y_mode_prob[size_group]); + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.y_mode[size_group][y_mode]; + return y_mode; +} + +static MB_PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, vp9_reader *r, + MB_PREDICTION_MODE y_mode) { + const MB_PREDICTION_MODE uv_mode = read_intra_mode(r, + cm->fc.uv_mode_prob[y_mode]); + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.uv_mode[y_mode][uv_mode]; + return uv_mode; +} + static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r, uint8_t context) { - MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree, - cm->fc.inter_mode_probs[context]); - ++cm->counts.inter_mode[context][inter_mode_offset(mode)]; + const MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree, + cm->fc.inter_mode_probs[context]); + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.inter_mode[context][inter_mode_offset(mode)]; return mode; } @@ -53,33 +71,28 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, tx_size += vp9_read(r, tx_probs[2]); } - update_tx_counts(bsize, context, tx_size, &cm->counts.tx); + if (!cm->frame_parallel_decoding_mode) + ++get_tx_counts(bsize, context, &cm->counts.tx)[tx_size]; return tx_size; } -static TX_SIZE read_tx_size(VP9D_COMP *pbi, TX_MODE tx_mode, - BLOCK_SIZE bsize, int allow_select, +static TX_SIZE read_tx_size(VP9_COMMON *const cm, MACROBLOCKD *const xd, + TX_MODE tx_mode, BLOCK_SIZE bsize, int allow_select, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - - if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) + if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) { return read_selected_tx_size(cm, xd, bsize, r); - else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_32X32) - return TX_32X32; - else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_16X16) - return TX_16X16; - else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_8X8) - return TX_8X8; - else - return TX_4X4; + } else { + const TX_SIZE max_tx_size_block = max_txsize_lookup[bsize]; + const TX_SIZE max_tx_size_txmode = tx_mode_to_biggest_tx_size[tx_mode]; + return MIN(max_tx_size_block, max_tx_size_txmode); + } } static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize, int mi_row, int mi_col, int segment_id) { const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = 1 << mi_width_log2(bsize); - const int bh = 1 << mi_height_log2(bsize); + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = MIN(cm->mi_cols - mi_col, bw); const int ymis = MIN(cm->mi_rows - mi_row, bh); int x, y; @@ -91,11 +104,11 @@ static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize, cm->last_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id; } -static int read_intra_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col, +static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int mi_row, int mi_col, vp9_reader *r) { - MACROBLOCKD *const xd = &pbi->mb; - struct segmentation *const seg = &pbi->common.seg; - const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type; + struct segmentation *const seg = &cm->seg; + const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; int segment_id; if (!seg->enabled) @@ -105,16 +118,14 @@ static int read_intra_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col, return 0; segment_id = read_segment_id(r, seg); - set_segment_id(&pbi->common, bsize, mi_row, mi_col, segment_id); + set_segment_id(cm, bsize, mi_row, mi_col, segment_id); return segment_id; } -static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col, - vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int mi_row, int mi_col, vp9_reader *r) { struct segmentation *const seg = &cm->seg; - const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; int pred_segment_id, segment_id; if (!seg->enabled) @@ -138,37 +149,37 @@ static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col, return segment_id; } -static uint8_t read_skip_coeff(VP9D_COMP *pbi, int segment_id, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - int skip_coeff = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); - if (!skip_coeff) { +static int read_skip_coeff(VP9_COMMON *cm, const MACROBLOCKD *xd, + int segment_id, vp9_reader *r) { + if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { + return 1; + } else { const int ctx = vp9_get_pred_context_mbskip(xd); - skip_coeff = vp9_read(r, vp9_get_pred_prob_mbskip(cm, xd)); - cm->counts.mbskip[ctx][skip_coeff]++; + const int skip = vp9_read(r, cm->fc.mbskip_probs[ctx]); + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.mbskip[ctx][skip]; + return skip; } - return skip_coeff; } -static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m, +static void read_intra_frame_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + MODE_INFO *const m, int mi_row, int mi_col, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; MB_MODE_INFO *const mbmi = &m->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; const MODE_INFO *above_mi = xd->mi_8x8[-cm->mode_info_stride]; - const MODE_INFO *left_mi = xd->mi_8x8[-1]; + const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; - mbmi->segment_id = read_intra_segment_id(pbi, mi_row, mi_col, r); - mbmi->skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); - mbmi->tx_size = read_tx_size(pbi, cm->tx_mode, bsize, 1, r); + mbmi->segment_id = read_intra_segment_id(cm, xd, mi_row, mi_col, r); + mbmi->skip_coeff = read_skip_coeff(cm, xd, mbmi->segment_id, r); + mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, bsize, 1, r); mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE; if (bsize >= BLOCK_8X8) { const MB_PREDICTION_MODE A = above_block_mode(m, above_mi, 0); - const MB_PREDICTION_MODE L = xd->left_available ? - left_block_mode(m, left_mi, 0) : DC_PRED; + const MB_PREDICTION_MODE L = left_block_mode(m, left_mi, 0); mbmi->mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]); } else { // Only 4x4, 4x8, 8x4 blocks @@ -180,8 +191,7 @@ static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m, for (idx = 0; idx < 2; idx += num_4x4_w) { const int ib = idy * 2 + idx; const MB_PREDICTION_MODE A = above_block_mode(m, above_mi, ib); - const MB_PREDICTION_MODE L = (xd->left_available || idx) ? - left_block_mode(m, left_mi, ib) : DC_PRED; + const MB_PREDICTION_MODE L = left_block_mode(m, left_mi, ib); const MB_PREDICTION_MODE b_mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]); m->bmi[ib].as_mode = b_mode; @@ -200,7 +210,6 @@ static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m, static int read_mv_component(vp9_reader *r, const nmv_component *mvcomp, int usehp) { - int mag, d, fr, hp; const int sign = vp9_read(r, mvcomp->sign); const int mv_class = treed_read(r, vp9_mv_class_tree, mvcomp->classes); @@ -251,56 +260,10 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, mv->col = ref->col + diff.col; } -static void update_mv(vp9_reader *r, vp9_prob *p) { - if (vp9_read(r, NMV_UPDATE_PROB)) - *p = (vp9_read_literal(r, 7) << 1) | 1; -} - -static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) { - int i, j, k; - - for (j = 0; j < MV_JOINTS - 1; ++j) - update_mv(r, &mvc->joints[j]); - - for (i = 0; i < 2; ++i) { - nmv_component *const comp = &mvc->comps[i]; - - update_mv(r, &comp->sign); - - for (j = 0; j < MV_CLASSES - 1; ++j) - update_mv(r, &comp->classes[j]); - - for (j = 0; j < CLASS0_SIZE - 1; ++j) - update_mv(r, &comp->class0[j]); - - for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(r, &comp->bits[j]); - } - - for (i = 0; i < 2; ++i) { - nmv_component *const comp = &mvc->comps[i]; - - for (j = 0; j < CLASS0_SIZE; ++j) - for (k = 0; k < 3; ++k) - update_mv(r, &comp->class0_fp[j][k]); - - for (j = 0; j < 3; ++j) - update_mv(r, &comp->fp[j]); - } - - if (allow_hp) { - for (i = 0; i < 2; ++i) { - update_mv(r, &mvc->comps[i].class0_hp); - update_mv(r, &mvc->comps[i].hp); - } - } -} - // Read the referncence frame -static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, +static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, + vp9_reader *r, int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; @@ -313,7 +276,8 @@ static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, if (cm->comp_pred_mode == HYBRID_PREDICTION) { is_comp = vp9_read(r, fc->comp_inter_prob[comp_ctx]); - counts->comp_inter[comp_ctx][is_comp]++; + if (!cm->frame_parallel_decoding_mode) + ++counts->comp_inter[comp_ctx][is_comp]; } else { is_comp = cm->comp_pred_mode == COMP_PREDICTION_ONLY; } @@ -323,18 +287,21 @@ static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int ref_ctx = vp9_get_pred_context_comp_ref_p(cm, xd); const int b = vp9_read(r, fc->comp_ref_prob[ref_ctx]); - counts->comp_ref[ref_ctx][b]++; + if (!cm->frame_parallel_decoding_mode) + ++counts->comp_ref[ref_ctx][b]; ref_frame[fix_ref_idx] = cm->comp_fixed_ref; ref_frame[!fix_ref_idx] = cm->comp_var_ref[b]; } else { const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]); - ++counts->single_ref[ctx0][0][bit0]; + if (!cm->frame_parallel_decoding_mode) + ++counts->single_ref[ctx0][0][bit0]; if (bit0) { const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]); ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; - ++counts->single_ref[ctx1][1][bit1]; + if (!cm->frame_parallel_decoding_mode) + ++counts->single_ref[ctx1][1][bit1]; } else { ref_frame[0] = LAST_FRAME; } @@ -344,43 +311,19 @@ static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, } } -static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) { - int i, j; - for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j) - for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); -} -static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) { - int i, j; - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - for (j = 0; j < INTER_MODES - 1; ++j) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); -} - -static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { - COMPPREDMODE_TYPE mode = vp9_read_bit(r); - if (mode) - mode += vp9_read_bit(r); - return mode; -} - -static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( - VP9D_COMP *pbi, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static INLINE INTERPOLATION_TYPE read_switchable_filter_type( + VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r) { const int ctx = vp9_get_pred_context_switchable_interp(xd); const int type = treed_read(r, vp9_switchable_interp_tree, cm->fc.switchable_interp_prob[ctx]); - ++cm->counts.switchable_interp[ctx][type]; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.switchable_interp[ctx][type]; return type; } -static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, - vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; +static void read_intra_block_mode_info(VP9_COMMON *const cm, MODE_INFO *mi, + vp9_reader *r) { MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mi->mbmi.sb_type; @@ -388,9 +331,7 @@ static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, mbmi->ref_frame[1] = NONE; if (bsize >= BLOCK_8X8) { - const int size_group = size_group_lookup[bsize]; - mbmi->mode = read_intra_mode(r, cm->fc.y_mode_prob[size_group]); - cm->counts.y_mode[size_group][mbmi->mode]++; + mbmi->mode = read_intra_mode_y(cm, r, size_group_lookup[bsize]); } else { // Only 4x4, 4x8, 8x4 blocks const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 @@ -400,10 +341,8 @@ static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { const int ib = idy * 2 + idx; - const int b_mode = read_intra_mode(r, cm->fc.y_mode_prob[0]); + const int b_mode = read_intra_mode_y(cm, r, 0); mi->bmi[ib].as_mode = b_mode; - cm->counts.y_mode[0][b_mode]++; - if (num_4x4_h == 2) mi->bmi[ib + 2].as_mode = b_mode; if (num_4x4_w == 2) @@ -413,55 +352,98 @@ static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, mbmi->mode = mi->bmi[3].as_mode; } - mbmi->uv_mode = read_intra_mode(r, cm->fc.uv_mode_prob[mbmi->mode]); - cm->counts.uv_mode[mbmi->mode][mbmi->uv_mode]++; + mbmi->uv_mode = read_intra_mode_uv(cm, r, mbmi->mode); } -static int read_is_inter_block(VP9D_COMP *pbi, int segment_id, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static INLINE int assign_mv(VP9_COMMON *cm, MB_PREDICTION_MODE mode, + int_mv mv[2], int_mv best_mv[2], + int_mv nearest_mv[2], int_mv near_mv[2], + int is_compound, int allow_hp, vp9_reader *r) { + int i; + int ret = 1; + + switch (mode) { + case NEWMV: { + nmv_context_counts *const mv_counts = cm->frame_parallel_decoding_mode ? + NULL : &cm->counts.mv; + read_mv(r, &mv[0].as_mv, &best_mv[0].as_mv, + &cm->fc.nmvc, mv_counts, allow_hp); + if (is_compound) + read_mv(r, &mv[1].as_mv, &best_mv[1].as_mv, + &cm->fc.nmvc, mv_counts, allow_hp); + for (i = 0; i < 1 + is_compound; ++i) { + ret = ret && mv[i].as_mv.row < MV_UPP && mv[i].as_mv.row > MV_LOW; + ret = ret && mv[i].as_mv.col < MV_UPP && mv[i].as_mv.col > MV_LOW; + } + break; + } + case NEARESTMV: { + mv[0].as_int = nearest_mv[0].as_int; + if (is_compound) mv[1].as_int = nearest_mv[1].as_int; + break; + } + case NEARMV: { + mv[0].as_int = near_mv[0].as_int; + if (is_compound) mv[1].as_int = near_mv[1].as_int; + break; + } + case ZEROMV: { + mv[0].as_int = 0; + if (is_compound) mv[1].as_int = 0; + break; + } + default: { + return 0; + } + } + return ret; +} +static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int segment_id, vp9_reader *r) { if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { return vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; } else { const int ctx = vp9_get_pred_context_intra_inter(xd); const int is_inter = vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd)); - ++cm->counts.intra_inter[ctx][is_inter]; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.intra_inter[ctx][is_inter]; return is_inter; } } -static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, +static void read_inter_block_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + const TileInfo *const tile, + MODE_INFO *const mi, int mi_row, int mi_col, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - nmv_context *const nmvc = &cm->fc.nmvc; MB_MODE_INFO *const mbmi = &mi->mbmi; - int_mv *const mv0 = &mbmi->mv[0]; - int_mv *const mv1 = &mbmi->mv[1]; const BLOCK_SIZE bsize = mbmi->sb_type; - const int allow_hp = xd->allow_high_precision_mv; + const int allow_hp = cm->allow_high_precision_mv; - int_mv nearest, nearby, best_mv; - int_mv nearest_second, nearby_second, best_mv_second; + int_mv nearest[2], nearmv[2], best[2]; uint8_t inter_mode_ctx; MV_REFERENCE_FRAME ref0; int is_compound; mbmi->uv_mode = DC_PRED; - read_ref_frames(pbi, r, mbmi->segment_id, mbmi->ref_frame); + read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame); ref0 = mbmi->ref_frame[0]; is_compound = has_second_ref(mbmi); - vp9_find_mv_refs(cm, xd, mi, xd->last_mi, ref0, mbmi->ref_mvs[ref0], + vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, ref0, mbmi->ref_mvs[ref0], mi_row, mi_col); inter_mode_ctx = mbmi->mode_context[ref0]; if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { mbmi->mode = ZEROMV; - assert(bsize >= BLOCK_8X8); + if (bsize < BLOCK_8X8) { + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid usage of segement feature on small blocks"); + return; + } } else { if (bsize >= BLOCK_8X8) mbmi->mode = read_inter_mode(cm, r, inter_mode_ctx); @@ -469,222 +451,119 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, // nearest, nearby if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) { - vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby); - best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int; + vp9_find_best_ref_mvs(xd, allow_hp, + mbmi->ref_mvs[ref0], &nearest[0], &nearmv[0]); + best[0].as_int = nearest[0].as_int; } if (is_compound) { const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1]; - vp9_find_mv_refs(cm, xd, mi, xd->last_mi, + vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, ref1, mbmi->ref_mvs[ref1], mi_row, mi_col); if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) { - vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1], - &nearest_second, &nearby_second); - best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int; + vp9_find_best_ref_mvs(xd, allow_hp, + mbmi->ref_mvs[ref1], &nearest[1], &nearmv[1]); + best[1].as_int = nearest[1].as_int; } } - mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE - ? read_switchable_filter_type(pbi, r) - : cm->mcomp_filter_type; + mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE) + ? read_switchable_filter_type(cm, xd, r) + : cm->mcomp_filter_type; if (bsize < BLOCK_8X8) { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 int idx, idy; + int b_mode; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { - int_mv blockmv, secondmv; + int_mv block[2]; const int j = idy * 2 + idx; - const int b_mode = read_inter_mode(cm, r, inter_mode_ctx); + b_mode = read_inter_mode(cm, r, inter_mode_ctx); if (b_mode == NEARESTMV || b_mode == NEARMV) { - vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0, + vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, &nearest[0], + &nearmv[0], j, 0, mi_row, mi_col); if (is_compound) - vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second, - &nearby_second, j, 1, - mi_row, mi_col); + vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, &nearest[1], + &nearmv[1], j, 1, + mi_row, mi_col); } - switch (b_mode) { - case NEWMV: - read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc, - &cm->counts.mv, allow_hp); - - if (is_compound) - read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc, - &cm->counts.mv, allow_hp); - break; - case NEARESTMV: - blockmv.as_int = nearest.as_int; - if (is_compound) - secondmv.as_int = nearest_second.as_int; - break; - case NEARMV: - blockmv.as_int = nearby.as_int; - if (is_compound) - secondmv.as_int = nearby_second.as_int; - break; - case ZEROMV: - blockmv.as_int = 0; - if (is_compound) - secondmv.as_int = 0; - break; - default: - assert(!"Invalid inter mode value"); - } - mi->bmi[j].as_mv[0].as_int = blockmv.as_int; + if (!assign_mv(cm, b_mode, block, best, nearest, nearmv, + is_compound, allow_hp, r)) { + xd->corrupted |= 1; + break; + }; + + + mi->bmi[j].as_mv[0].as_int = block[0].as_int; if (is_compound) - mi->bmi[j].as_mv[1].as_int = secondmv.as_int; + mi->bmi[j].as_mv[1].as_int = block[1].as_int; if (num_4x4_h == 2) mi->bmi[j + 2] = mi->bmi[j]; if (num_4x4_w == 2) mi->bmi[j + 1] = mi->bmi[j]; - mi->mbmi.mode = b_mode; } } - mv0->as_int = mi->bmi[3].as_mv[0].as_int; - mv1->as_int = mi->bmi[3].as_mv[1].as_int; - } else { - switch (mbmi->mode) { - case NEARMV: - mv0->as_int = nearby.as_int; - if (is_compound) - mv1->as_int = nearby_second.as_int; - break; + mi->mbmi.mode = b_mode; - case NEARESTMV: - mv0->as_int = nearest.as_int; - if (is_compound) - mv1->as_int = nearest_second.as_int; - break; - - case ZEROMV: - mv0->as_int = 0; - if (is_compound) - mv1->as_int = 0; - break; - - case NEWMV: - read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, allow_hp); - if (is_compound) - read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, &cm->counts.mv, - allow_hp); - break; - default: - assert(!"Invalid inter mode value"); - } + mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; + mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; + } else { + xd->corrupted |= !assign_mv(cm, mbmi->mode, mbmi->mv, + best, nearest, nearmv, + is_compound, allow_hp, r); } } -static void read_inter_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, +static void read_inter_frame_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + const TileInfo *const tile, + MODE_INFO *const mi, int mi_row, int mi_col, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; MB_MODE_INFO *const mbmi = &mi->mbmi; int inter_block; mbmi->mv[0].as_int = 0; mbmi->mv[1].as_int = 0; - mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r); - mbmi->skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); - inter_block = read_is_inter_block(pbi, mbmi->segment_id, r); - mbmi->tx_size = read_tx_size(pbi, cm->tx_mode, mbmi->sb_type, + mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); + mbmi->skip_coeff = read_skip_coeff(cm, xd, mbmi->segment_id, r); + inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); + mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, mbmi->sb_type, !mbmi->skip_coeff || !inter_block, r); if (inter_block) - read_inter_block_mode_info(pbi, mi, mi_row, mi_col, r); + read_inter_block_mode_info(cm, xd, tile, mi, mi_row, mi_col, r); else - read_intra_block_mode_info(pbi, mi, r); + read_intra_block_mode_info(cm, mi, r); } -static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { - int i; - - cm->comp_pred_mode = cm->allow_comp_inter_inter ? read_comp_pred_mode(r) - : SINGLE_PREDICTION_ONLY; - - if (cm->comp_pred_mode == HYBRID_PREDICTION) - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]); - - if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) - for (i = 0; i < REF_CONTEXTS; i++) { - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]); - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]); - } - - if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) - for (i = 0; i < REF_CONTEXTS; i++) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]); -} - -void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - int k; - - // TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove. - // vpx_memset(cm->fc.mbskip_probs, 0, sizeof(cm->fc.mbskip_probs)); - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &cm->fc.mbskip_probs[k]); - - if (cm->frame_type != KEY_FRAME && !cm->intra_only) { - nmv_context *const nmvc = &pbi->common.fc.nmvc; - MACROBLOCKD *const xd = &pbi->mb; - int i, j; - - read_inter_mode_probs(&cm->fc, r); - - if (cm->mcomp_filter_type == SWITCHABLE) - read_switchable_interp_probs(&cm->fc, r); - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]); - - read_comp_pred(cm, r); - - for (j = 0; j < BLOCK_SIZE_GROUPS; j++) - for (i = 0; i < INTRA_MODES - 1; ++i) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &cm->fc.y_mode_prob[j][i]); - - for (j = 0; j < NUM_PARTITION_CONTEXTS; ++j) - for (i = 0; i < PARTITION_TYPES - 1; ++i) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &cm->fc.partition_prob[INTER_FRAME][j][i]); - - read_mv_probs(r, nmvc, xd->allow_high_precision_mv); - } -} - -void vp9_read_mode_info(VP9D_COMP* pbi, int mi_row, int mi_col, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - MODE_INFO *mi = xd->this_mi; +void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, + const TileInfo *const tile, + int mi_row, int mi_col, vp9_reader *r) { + MODE_INFO *const mi = xd->mi_8x8[0]; const BLOCK_SIZE bsize = mi->mbmi.sb_type; - const int bw = 1 << mi_width_log2(bsize); - const int bh = 1 << mi_height_log2(bsize); + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; const int y_mis = MIN(bh, cm->mi_rows - mi_row); const int x_mis = MIN(bw, cm->mi_cols - mi_col); int x, y, z; - if (cm->frame_type == KEY_FRAME || cm->intra_only) - read_intra_frame_mode_info(pbi, mi, mi_row, mi_col, r); + if (frame_is_intra_only(cm)) + read_intra_frame_mode_info(cm, xd, mi, mi_row, mi_col, r); else - read_inter_frame_mode_info(pbi, mi, mi_row, mi_col, r); + read_inter_frame_mode_info(cm, xd, tile, mi, mi_row, mi_col, r); - for (y = 0, z = 0; y < y_mis; y++, z += cm->mode_info_stride) + for (y = 0, z = 0; y < y_mis; y++, z += cm->mode_info_stride) { for (x = !y; x < x_mis; x++) { - xd->mi_8x8[z + x] = mi; - } + xd->mi_8x8[z + x] = mi; + } + } } diff --git a/libvpx/vp9/decoder/vp9_decodemv.h b/libvpx/vp9/decoder/vp9_decodemv.h index 462d2e3..8e9ae4a 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.h +++ b/libvpx/vp9/decoder/vp9_decodemv.h @@ -14,8 +14,10 @@ #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_dboolhuff.h" -void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r); +struct TileInfo; -void vp9_read_mode_info(VP9D_COMP* pbi, int mi_row, int mi_col, vp9_reader *r); +void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, + const struct TileInfo *const tile, + int mi_row, int mi_col, vp9_reader *r); #endif // VP9_DECODER_VP9_DECODEMV_H_ diff --git a/libvpx/vp9/decoder/vp9_decodframe.c b/libvpx/vp9/decoder/vp9_decodframe.c index dbba28e..4746a3a 100644 --- a/libvpx/vp9/decoder/vp9_decodframe.c +++ b/libvpx/vp9/decoder/vp9_decodframe.c @@ -19,6 +19,7 @@ #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_extend.h" +#include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconintra.h" @@ -31,16 +32,49 @@ #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_dsubexp.h" -#include "vp9/decoder/vp9_idct_blk.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_read_bit_buffer.h" #include "vp9/decoder/vp9_thread.h" #include "vp9/decoder/vp9_treereader.h" +typedef struct TileWorkerData { + VP9_COMMON *cm; + vp9_reader bit_reader; + DECLARE_ALIGNED(16, MACROBLOCKD, xd); + DECLARE_ALIGNED(16, unsigned char, token_cache[1024]); +} TileWorkerData; + static int read_be32(const uint8_t *p) { return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; } +static int is_compound_prediction_allowed(const VP9_COMMON *cm) { + int i; + for (i = 1; i < ALLOWED_REFS_PER_FRAME; ++i) + if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) + return 1; + + return 0; +} + +static void setup_compound_prediction(VP9_COMMON *cm) { + if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[GOLDEN_FRAME]) { + cm->comp_fixed_ref = ALTREF_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = GOLDEN_FRAME; + } else if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[ALTREF_FRAME]) { + cm->comp_fixed_ref = GOLDEN_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } else { + cm->comp_fixed_ref = LAST_FRAME; + cm->comp_var_ref[0] = GOLDEN_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } +} + // len == 0 is not allowed static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { return start + len > start && start + len <= end; @@ -63,18 +97,105 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) { for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 3; ++j) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); + vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 2; ++j) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); + vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 1; ++j) - if (vp9_read(r, MODE_UPDATE_PROB)) - vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); + vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); +} + +static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) { + int i, j; + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) + for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) + vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); +} + +static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) { + int i, j; + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + for (j = 0; j < INTER_MODES - 1; ++j) + vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); +} + +static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { + COMPPREDMODE_TYPE mode = vp9_read_bit(r); + if (mode) + mode += vp9_read_bit(r); + return mode; +} + +static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { + int i; + + const int compound_allowed = is_compound_prediction_allowed(cm); + cm->comp_pred_mode = compound_allowed ? read_comp_pred_mode(r) + : SINGLE_PREDICTION_ONLY; + if (compound_allowed) + setup_compound_prediction(cm); + + if (cm->comp_pred_mode == HYBRID_PREDICTION) + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]); + + if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) + for (i = 0; i < REF_CONTEXTS; i++) { + vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]); + vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]); + } + + if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) + for (i = 0; i < REF_CONTEXTS; i++) + vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]); +} + +static void update_mv(vp9_reader *r, vp9_prob *p) { + if (vp9_read(r, NMV_UPDATE_PROB)) + *p = (vp9_read_literal(r, 7) << 1) | 1; +} + +static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) { + int i, j, k; + + for (j = 0; j < MV_JOINTS - 1; ++j) + update_mv(r, &mvc->joints[j]); + + for (i = 0; i < 2; ++i) { + nmv_component *const comp = &mvc->comps[i]; + + update_mv(r, &comp->sign); + + for (j = 0; j < MV_CLASSES - 1; ++j) + update_mv(r, &comp->classes[j]); + + for (j = 0; j < CLASS0_SIZE - 1; ++j) + update_mv(r, &comp->class0[j]); + + for (j = 0; j < MV_OFFSET_BITS; ++j) + update_mv(r, &comp->bits[j]); + } + + for (i = 0; i < 2; ++i) { + nmv_component *const comp = &mvc->comps[i]; + + for (j = 0; j < CLASS0_SIZE; ++j) + for (k = 0; k < 3; ++k) + update_mv(r, &comp->class0_fp[j][k]); + + for (j = 0; j < 3; ++j) + update_mv(r, &comp->fp[j]); + } + + if (allow_hp) { + for (i = 0; i < 2; ++i) { + update_mv(r, &mvc->comps[i].class0_hp); + update_mv(r, &mvc->comps[i].hp); + } + } } static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { @@ -85,47 +206,110 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { xd->plane[i].dequant = cm->uv_dequant[q_index]; } -static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - MACROBLOCKD* const xd = arg; +// Allocate storage for each tile column. +// TODO(jzern): when max_threads <= 1 the same storage could be used for each +// tile. +static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) { + VP9_COMMON *const cm = &pbi->common; + const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + int i, tile_col; + + CHECK_MEM_ERROR(cm, pbi->mi_streams, + vpx_realloc(pbi->mi_streams, tile_cols * + sizeof(*pbi->mi_streams))); + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileInfo tile; + + vp9_tile_init(&tile, cm, 0, tile_col); + pbi->mi_streams[tile_col] = + &cm->mi[cm->mi_rows * tile.mi_col_start]; + } + + // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm + // block where mi unit size is 8x8. + CHECK_MEM_ERROR(cm, pbi->above_context[0], + vpx_realloc(pbi->above_context[0], + sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * + 2 * aligned_mi_cols)); + for (i = 1; i < MAX_MB_PLANE; ++i) { + pbi->above_context[i] = pbi->above_context[0] + + i * sizeof(*pbi->above_context[0]) * + 2 * aligned_mi_cols; + } + + // This is sized based on the entire frame. Each tile operates within its + // column bounds. + CHECK_MEM_ERROR(cm, pbi->above_seg_context, + vpx_realloc(pbi->above_seg_context, + sizeof(*pbi->above_seg_context) * + aligned_mi_cols)); +} + +static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { struct macroblockd_plane *const pd = &xd->plane[plane]; - int16_t* const qcoeff = BLOCK_OFFSET(pd->qcoeff, block); + int16_t* const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); const int stride = pd->dst.stride; const int eob = pd->eobs[block]; - const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, - block); - uint8_t* const dst = raster_block_offset_uint8(plane_bsize, raster_block, - pd->dst.buf, stride); - switch (tx_size) { - case TX_4X4: { - const TX_TYPE tx_type = get_tx_type_4x4(pd->plane_type, xd, raster_block); - if (tx_type == DCT_DCT) - xd->itxm_add(qcoeff, dst, stride, eob); + if (eob > 0) { + TX_TYPE tx_type; + const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, + block); + uint8_t* const dst = raster_block_offset_uint8(plane_bsize, raster_block, + pd->dst.buf, stride); + switch (tx_size) { + case TX_4X4: + tx_type = get_tx_type_4x4(pd->plane_type, xd, raster_block); + if (tx_type == DCT_DCT) + xd->itxm_add(dqcoeff, dst, stride, eob); + else + vp9_iht4x4_16_add(dqcoeff, dst, stride, tx_type); + break; + case TX_8X8: + tx_type = get_tx_type_8x8(pd->plane_type, xd); + vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_16X16: + tx_type = get_tx_type_16x16(pd->plane_type, xd); + vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_32X32: + tx_type = DCT_DCT; + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(!"Invalid transform size"); + } + + if (eob == 1) { + vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0])); + } else { + if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) + vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); else - vp9_iht_add_c(tx_type, qcoeff, dst, stride, eob); - break; + vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); } - case TX_8X8: - vp9_iht_add_8x8_c(get_tx_type_8x8(pd->plane_type, xd), qcoeff, dst, - stride, eob); - break; - case TX_16X16: - vp9_iht_add_16x16_c(get_tx_type_16x16(pd->plane_type, xd), qcoeff, dst, - stride, eob); - break; - case TX_32X32: - vp9_idct_add_32x32(qcoeff, dst, stride, eob); - break; - default: - assert(!"Invalid transform size"); } } -static void decode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - MACROBLOCKD* const xd = arg; +struct intra_args { + VP9_COMMON *cm; + MACROBLOCKD *xd; + vp9_reader *r; + unsigned char* token_cache; +}; + +static void predict_and_reconstruct_intra_block(int plane, int block, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { + struct intra_args *const args = arg; + VP9_COMMON *const cm = args->cm; + MACROBLOCKD *const xd = args->xd; + struct macroblockd_plane *const pd = &xd->plane[plane]; - MODE_INFO *const mi = xd->this_mi; + MODE_INFO *const mi = xd->mi_8x8[0]; const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, block); uint8_t* const dst = raster_block_offset_uint8(plane_bsize, raster_block, @@ -142,32 +326,37 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, b_width_log2(plane_bsize), tx_size, mode, dst, pd->dst.stride, dst, pd->dst.stride); - if (!mi->mbmi.skip_coeff) - decode_block(plane, block, plane_bsize, tx_size, arg); + if (!mi->mbmi.skip_coeff) { + vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size, + args->r, args->token_cache); + inverse_transform_block(xd, plane, block, plane_bsize, tx_size); + } } -static int decode_tokens(VP9D_COMP *pbi, BLOCK_SIZE bsize, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; - - if (mbmi->skip_coeff) { - reset_skip_context(xd, bsize); - return -1; - } else { - if (cm->seg.enabled) - setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id, - cm->base_qindex)); - - // TODO(dkovalev) if (!vp9_reader_has_error(r)) - return vp9_decode_tokens(pbi, r, bsize); - } +struct inter_args { + VP9_COMMON *cm; + MACROBLOCKD *xd; + vp9_reader *r; + int *eobtotal; + unsigned char* token_cache; +}; + +static void reconstruct_inter_block(int plane, int block, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { + struct inter_args *args = arg; + VP9_COMMON *const cm = args->cm; + MACROBLOCKD *const xd = args->xd; + + *args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block, + plane_bsize, tx_size, + args->r, args->token_cache); + inverse_transform_block(xd, plane, block, plane_bsize, tx_size); } -static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE bsize, - int mi_row, int mi_col) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { const int bh = num_8x8_blocks_high_lookup[bsize]; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int offset = mi_row * cm->mode_info_stride + mi_col; @@ -178,178 +367,187 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE bsize, xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset; // we are using the mode info context stream here - xd->this_mi = - xd->mi_8x8[0] = xd->mic_stream_ptr; - xd->this_mi->mbmi.sb_type = bsize; - xd->mic_stream_ptr++; + xd->mi_8x8[0] = xd->mi_stream; + xd->mi_8x8[0]->mbmi.sb_type = bsize; + ++xd->mi_stream; // Special case: if prev_mi is NULL, the previous mode info context // cannot be used. xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL; - set_skip_context(cm, xd, mi_row, mi_col); - set_partition_seg_context(cm, xd, mi_row, mi_col); + set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units - set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw); + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], mi_row, mi_col); + setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); } -static void set_ref(VP9D_COMP *pbi, int i, int mi_row, int mi_col) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; - const int ref = mbmi->ref_frame[i] - LAST_FRAME; - const YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->active_ref_idx[ref]]; - const struct scale_factors *sf = &cm->active_ref_scale[ref]; - if (!vp9_is_valid_scale(sf)) +static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int idx, int mi_row, int mi_col) { + MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const int ref = mbmi->ref_frame[idx] - LAST_FRAME; + const YV12_BUFFER_CONFIG *cfg = get_frame_ref_buffer(cm, ref); + const struct scale_factors_common *sfc = &cm->active_ref_scale_comm[ref]; + if (!vp9_is_valid_scale(sfc)) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid scale factors"); - xd->scale_factor[i] = *sf; - setup_pre_planes(xd, i, cfg, mi_row, mi_col, sf); + xd->scale_factor[idx].sfc = sfc; + setup_pre_planes(xd, idx, cfg, mi_row, mi_col, &xd->scale_factor[idx]); xd->corrupted |= cfg->corrupted; } -static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col, - vp9_reader *r, BLOCK_SIZE bsize) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + vp9_reader *r, BLOCK_SIZE bsize, + unsigned char *token_cache) { const int less8x8 = bsize < BLOCK_8X8; MB_MODE_INFO *mbmi; - if (less8x8) - if (xd->ab_index > 0) - return; - - set_offsets(pbi, bsize, mi_row, mi_col); - vp9_read_mode_info(pbi, mi_row, mi_col, r); + set_offsets(cm, xd, tile, bsize, mi_row, mi_col); + vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); if (less8x8) bsize = BLOCK_8X8; // Has to be called after set_offsets - mbmi = &xd->this_mi->mbmi; + mbmi = &xd->mi_8x8[0]->mbmi; - if (!is_inter_block(mbmi)) { - // Intra reconstruction - decode_tokens(pbi, bsize, r); - foreach_transformed_block(xd, bsize, decode_block_intra, xd); + if (mbmi->skip_coeff) { + reset_skip_context(xd, bsize); } else { - // Inter reconstruction - int eobtotal; + if (cm->seg.enabled) + setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id, + cm->base_qindex)); + } - set_ref(pbi, 0, mi_row, mi_col); + if (!is_inter_block(mbmi)) { + struct intra_args arg = { cm, xd, r, token_cache }; + foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block, + &arg); + } else { + // Setup + set_ref(cm, xd, 0, mi_row, mi_col); if (has_second_ref(mbmi)) - set_ref(pbi, 1, mi_row, mi_col); + set_ref(cm, xd, 1, mi_row, mi_col); - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + xd->subpix.filter_x = xd->subpix.filter_y = + vp9_get_filter_kernel(mbmi->interp_filter); + + // Prediction vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); - eobtotal = decode_tokens(pbi, bsize, r); - if (less8x8) { - if (eobtotal >= 0) - foreach_transformed_block(xd, bsize, decode_block, xd); - } else { - assert(mbmi->sb_type == bsize); - if (eobtotal == 0) - // skip loopfilter - vp9_set_pred_flag_mbskip(xd, bsize, 1); - else if (eobtotal > 0) - foreach_transformed_block(xd, bsize, decode_block, xd); + + // Reconstruction + if (!mbmi->skip_coeff) { + int eobtotal = 0; + struct inter_args arg = { cm, xd, r, &eobtotal, token_cache }; + foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); + if (!less8x8 && eobtotal == 0) + mbmi->skip_coeff = 1; // skip loopfilter } } + xd->corrupted |= vp9_reader_has_error(r); } -static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, - vp9_reader* r, BLOCK_SIZE bsize) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, + int mi_row, int mi_col, BLOCK_SIZE bsize, + vp9_reader *r) { + const int ctx = partition_plane_context(xd->above_seg_context, + xd->left_seg_context, + mi_row, mi_col, bsize); + const vp9_prob *const probs = get_partition_probs(cm, ctx); + const int has_rows = (mi_row + hbs) < cm->mi_rows; + const int has_cols = (mi_col + hbs) < cm->mi_cols; + PARTITION_TYPE p; + + if (has_rows && has_cols) + p = treed_read(r, vp9_partition_tree, probs); + else if (!has_rows && has_cols) + p = vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; + else if (has_rows && !has_cols) + p = vp9_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; + else + p = PARTITION_SPLIT; + + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.partition[ctx][p]; + + return p; +} + +static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + vp9_reader* r, BLOCK_SIZE bsize, + unsigned char *token_cache) { const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; - PARTITION_TYPE partition = PARTITION_NONE; + PARTITION_TYPE partition; BLOCK_SIZE subsize; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - if (bsize < BLOCK_8X8) { - if (xd->ab_index != 0) - return; - } else { - int pl; - const int idx = check_bsize_coverage(hbs, cm->mi_rows, cm->mi_cols, - mi_row, mi_col); - set_partition_seg_context(cm, xd, mi_row, mi_col); - pl = partition_plane_context(xd, bsize); - - if (idx == 0) - partition = treed_read(r, vp9_partition_tree, - cm->fc.partition_prob[cm->frame_type][pl]); - else if (idx > 0 && - !vp9_read(r, cm->fc.partition_prob[cm->frame_type][pl][idx])) - partition = (idx == 1) ? PARTITION_HORZ : PARTITION_VERT; - else - partition = PARTITION_SPLIT; - - cm->counts.partition[pl][partition]++; - } - + partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r); subsize = get_subsize(bsize, partition); - *get_sb_index(xd, subsize) = 0; - - switch (partition) { - case PARTITION_NONE: - decode_modes_b(pbi, mi_row, mi_col, r, subsize); - break; - case PARTITION_HORZ: - decode_modes_b(pbi, mi_row, mi_col, r, subsize); - *get_sb_index(xd, subsize) = 1; - if (mi_row + hbs < cm->mi_rows) - decode_modes_b(pbi, mi_row + hbs, mi_col, r, subsize); - break; - case PARTITION_VERT: - decode_modes_b(pbi, mi_row, mi_col, r, subsize); - *get_sb_index(xd, subsize) = 1; - if (mi_col + hbs < cm->mi_cols) - decode_modes_b(pbi, mi_row, mi_col + hbs, r, subsize); - break; - case PARTITION_SPLIT: { - int n; - for (n = 0; n < 4; n++) { - const int j = n >> 1, i = n & 1; - *get_sb_index(xd, subsize) = n; - decode_modes_sb(pbi, mi_row + j * hbs, mi_col + i * hbs, r, subsize); - } - } break; - default: - assert(!"Invalid partition type"); + if (subsize < BLOCK_8X8) { + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + } else { + switch (partition) { + case PARTITION_NONE: + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + break; + case PARTITION_HORZ: + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + if (mi_row + hbs < cm->mi_rows) + decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize, + token_cache); + break; + case PARTITION_VERT: + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache); + if (mi_col + hbs < cm->mi_cols) + decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize, + token_cache); + break; + case PARTITION_SPLIT: + decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize, + token_cache); + decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize, + token_cache); + decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize, + token_cache); + decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize, + token_cache); + break; + default: + assert(!"Invalid partition type"); + } } // update partition context if (bsize >= BLOCK_8X8 && - (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) { - set_partition_seg_context(cm, xd, mi_row, mi_col); - update_partition_context(xd, subsize, bsize); - } + (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) + update_partition_context(xd->above_seg_context, xd->left_seg_context, + mi_row, mi_col, subsize, bsize); } -static void setup_token_decoder(VP9D_COMP *pbi, - const uint8_t *data, size_t read_size, +static void setup_token_decoder(const uint8_t *data, + const uint8_t *data_end, + size_t read_size, + struct vpx_internal_error_info *error_info, vp9_reader *r) { - VP9_COMMON *cm = &pbi->common; - const uint8_t *data_end = pbi->source + pbi->source_sz; - // Validate the calculated partition length. If the buffer // described by the partition can't be fully read, then restrict // it to the portion that can be (for EC mode) or throw an error. if (!read_is_valid(data, read_size, data_end)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt tile length"); if (vp9_reader_init(r, data, read_size)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", 1); } @@ -364,22 +562,15 @@ static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs, for (l = 0; l < PREV_COEF_CONTEXTS; l++) if (k > 0 || l < 3) for (m = 0; m < UNCONSTRAINED_NODES; m++) - if (vp9_read(r, VP9_COEF_UPDATE_PROB)) - vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); + vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); } static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, vp9_reader *r) { - read_coef_probs_common(fc->coef_probs[TX_4X4], r); - - if (tx_mode > ONLY_4X4) - read_coef_probs_common(fc->coef_probs[TX_8X8], r); - - if (tx_mode > ALLOW_8X8) - read_coef_probs_common(fc->coef_probs[TX_16X16], r); - - if (tx_mode > ALLOW_16X16) - read_coef_probs_common(fc->coef_probs[TX_32X32], r); + const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; + TX_SIZE tx_size; + for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) + read_coef_probs_common(fc->coef_probs[tx_size], r); } static void setup_segmentation(struct segmentation *seg, @@ -436,7 +627,6 @@ static void setup_segmentation(struct segmentation *seg, static void setup_loopfilter(struct loopfilter *lf, struct vp9_read_bit_buffer *rb) { - lf->filter_level = vp9_rb_read_literal(rb, 6); lf->sharpness_level = vp9_rb_read_literal(rb, 3); @@ -467,9 +657,8 @@ static int read_delta_q(struct vp9_read_bit_buffer *rb, int *delta_q) { return old != *delta_q; } -static void setup_quantization(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { - MACROBLOCKD *const xd = &pbi->mb; - VP9_COMMON *const cm = &pbi->common; +static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, + struct vp9_read_bit_buffer *rb) { int update = 0; cm->base_qindex = vp9_rb_read_literal(rb, QINDEX_BITS); @@ -484,16 +673,15 @@ static void setup_quantization(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - xd->itxm_add = xd->lossless ? vp9_idct_add_lossless_c - : vp9_idct_add; + xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; } -static INTERPOLATIONFILTERTYPE read_interp_filter_type( - struct vp9_read_bit_buffer *rb) { - const INTERPOLATIONFILTERTYPE literal_to_type[] = { EIGHTTAP_SMOOTH, - EIGHTTAP, - EIGHTTAP_SHARP, - BILINEAR }; +static INTERPOLATION_TYPE read_interp_filter_type( + struct vp9_read_bit_buffer *rb) { + const INTERPOLATION_TYPE literal_to_type[] = { EIGHTTAP_SMOOTH, + EIGHTTAP, + EIGHTTAP_SHARP, + BILINEAR }; return vp9_rb_read_bit(rb) ? SWITCHABLE : literal_to_type[vp9_rb_read_literal(rb, 2)]; } @@ -539,7 +727,7 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { vp9_update_frame_size(cm); } - vp9_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], cm->width, cm->height, + vp9_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, VP9BORDERINPIXELS); } @@ -560,7 +748,7 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, int found = 0, i; for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) { if (vp9_rb_read_bit(rb)) { - YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->active_ref_idx[i]]; + YV12_BUFFER_CONFIG *const cfg = get_frame_ref_buffer(cm, i); width = cfg->y_crop_width; height = cfg->y_crop_height; found = 1; @@ -579,67 +767,73 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, setup_display_size(cm, rb); } -static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { +static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd, + int tile_col) { + int i; + xd->mi_stream = pbi->mi_streams[tile_col]; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + xd->above_context[i] = pbi->above_context[i]; + } + // see note in alloc_tile_storage(). + xd->above_seg_context = pbi->above_seg_context; +} + +static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, + vp9_reader *r) { const int num_threads = pbi->oxcf.max_threads; VP9_COMMON *const cm = &pbi->common; int mi_row, mi_col; - YV12_BUFFER_CONFIG *const fb = &cm->yv12_fb[cm->new_fb_idx]; + MACROBLOCKD *xd = &pbi->mb; if (pbi->do_loopfilter_inline) { - if (num_threads > 1) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - lf_data->frame_buffer = fb; - lf_data->cm = cm; - lf_data->xd = pbi->mb; - lf_data->stop = 0; - lf_data->y_only = 0; - } + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + lf_data->frame_buffer = get_frame_new_buffer(cm); + lf_data->cm = cm; + lf_data->xd = pbi->mb; + lf_data->stop = 0; + lf_data->y_only = 0; vp9_loop_filter_frame_init(cm, cm->lf.filter_level); } - for (mi_row = cm->cur_tile_mi_row_start; mi_row < cm->cur_tile_mi_row_end; + for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { // For a SB there are 2 left contexts, each pertaining to a MB row within - vp9_zero(cm->left_context); - vp9_zero(cm->left_seg_context); - for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; + vp9_zero(xd->left_context); + vp9_zero(xd->left_seg_context); + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64); + decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64, + pbi->token_cache); if (pbi->do_loopfilter_inline) { - // delay the loopfilter by 1 macroblock row. const int lf_start = mi_row - MI_BLOCK_SIZE; - if (lf_start < 0) continue; + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - if (num_threads > 1) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + // delay the loopfilter by 1 macroblock row. + if (lf_start < 0) continue; - // decoding has completed: finish up the loop filter in this thread. - if (mi_row + MI_BLOCK_SIZE >= cm->cur_tile_mi_row_end) continue; + // decoding has completed: finish up the loop filter in this thread. + if (mi_row + MI_BLOCK_SIZE >= tile->mi_row_end) continue; - vp9_worker_sync(&pbi->lf_worker); - lf_data->start = lf_start; - lf_data->stop = mi_row; - pbi->lf_worker.hook = vp9_loop_filter_worker; + vp9_worker_sync(&pbi->lf_worker); + lf_data->start = lf_start; + lf_data->stop = mi_row; + if (num_threads > 1) { vp9_worker_launch(&pbi->lf_worker); } else { - vp9_loop_filter_rows(fb, cm, &pbi->mb, lf_start, mi_row, 0); + vp9_worker_execute(&pbi->lf_worker); } } } if (pbi->do_loopfilter_inline) { - int lf_start; - if (num_threads > 1) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - vp9_worker_sync(&pbi->lf_worker); - lf_start = lf_data->stop; - } else { - lf_start = mi_row - MI_BLOCK_SIZE; - } - vp9_loop_filter_rows(fb, cm, &pbi->mb, - lf_start, cm->mi_rows, 0); + vp9_worker_sync(&pbi->lf_worker); + lf_data->start = lf_data->stop; + lf_data->stop = cm->mi_rows; + vp9_worker_execute(&pbi->lf_worker); } } @@ -659,10 +853,32 @@ static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { cm->log2_tile_rows += vp9_rb_read_bit(rb); } +// Reads the next tile returning its size and adjusting '*data' accordingly +// based on 'is_last'. +static size_t get_tile(const uint8_t *const data_end, + int is_last, + struct vpx_internal_error_info *error_info, + const uint8_t **data) { + size_t size; + + if (!is_last) { + if (!read_is_valid(*data, 4, data_end)) + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile length"); + + size = read_be32(*data); + *data += 4; + } else { + size = data_end - *data; + } + return size; +} + static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { vp9_reader residual_bc; VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; const uint8_t *const data_end = pbi->source + pbi->source_sz; const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); @@ -672,70 +888,57 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(cm->above_context[0], 0, - sizeof(ENTROPY_CONTEXT) * MAX_MB_PLANE * (2 * aligned_mi_cols)); + vpx_memset(pbi->above_context[0], 0, + sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * + 2 * aligned_mi_cols); - vpx_memset(cm->above_seg_context, 0, - sizeof(PARTITION_CONTEXT) * aligned_mi_cols); + vpx_memset(pbi->above_seg_context, 0, + sizeof(*pbi->above_seg_context) * aligned_mi_cols); if (pbi->oxcf.inv_tile_order) { const uint8_t *data_ptr2[4][1 << 6]; vp9_reader bc_bak = {0}; - // pre-initialize the offsets, we're going to read in inverse order + // pre-initialize the offsets, we're going to decode in inverse order data_ptr2[0][0] = data; for (tile_row = 0; tile_row < tile_rows; tile_row++) { - if (tile_row) { - const int size = read_be32(data_ptr2[tile_row - 1][tile_cols - 1]); - data_ptr2[tile_row - 1][tile_cols - 1] += 4; - data_ptr2[tile_row][0] = data_ptr2[tile_row - 1][tile_cols - 1] + size; - } - - for (tile_col = 1; tile_col < tile_cols; tile_col++) { - const int size = read_be32(data_ptr2[tile_row][tile_col - 1]); - data_ptr2[tile_row][tile_col - 1] += 4; - data_ptr2[tile_row][tile_col] = - data_ptr2[tile_row][tile_col - 1] + size; + for (tile_col = 0; tile_col < tile_cols; tile_col++) { + const int last_tile = + tile_row == tile_rows - 1 && tile_col == tile_cols - 1; + const size_t size = get_tile(data_end, last_tile, &cm->error, &data); + data_ptr2[tile_row][tile_col] = data; + data += size; } } for (tile_row = 0; tile_row < tile_rows; tile_row++) { - vp9_get_tile_row_offsets(cm, tile_row); for (tile_col = tile_cols - 1; tile_col >= 0; tile_col--) { - vp9_get_tile_col_offsets(cm, tile_col); - setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], + TileInfo tile; + + vp9_tile_init(&tile, cm, tile_row, tile_col); + setup_token_decoder(data_ptr2[tile_row][tile_col], data_end, data_end - data_ptr2[tile_row][tile_col], - &residual_bc); - decode_tile(pbi, &residual_bc); + &cm->error, &residual_bc); + setup_tile_context(pbi, xd, tile_col); + decode_tile(pbi, &tile, &residual_bc); if (tile_row == tile_rows - 1 && tile_col == tile_cols - 1) bc_bak = residual_bc; } } residual_bc = bc_bak; } else { - int has_more; - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - vp9_get_tile_row_offsets(cm, tile_row); for (tile_col = 0; tile_col < tile_cols; tile_col++) { - size_t size; - - vp9_get_tile_col_offsets(cm, tile_col); + const int last_tile = + tile_row == tile_rows - 1 && tile_col == tile_cols - 1; + const size_t size = get_tile(data_end, last_tile, &cm->error, &data); + TileInfo tile; - has_more = tile_col < tile_cols - 1 || tile_row < tile_rows - 1; - if (has_more) { - if (!read_is_valid(data, 4, data_end)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile length"); + vp9_tile_init(&tile, cm, tile_row, tile_col); - size = read_be32(data); - data += 4; - } else { - size = data_end - data; - } - - setup_token_decoder(pbi, data, size, &residual_bc); - decode_tile(pbi, &residual_bc); + setup_token_decoder(data, data_end, size, &cm->error, &residual_bc); + setup_tile_context(pbi, xd, tile_col); + decode_tile(pbi, &tile, &residual_bc); data += size; } } @@ -744,10 +947,113 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { return vp9_reader_find_end(&residual_bc); } +static int tile_worker_hook(void *arg1, void *arg2) { + TileWorkerData *tile_data = (TileWorkerData*)arg1; + const TileInfo *const tile = (TileInfo*)arg2; + int mi_row, mi_col; + + for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; + mi_row += MI_BLOCK_SIZE) { + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; + mi_col += MI_BLOCK_SIZE) { + decode_modes_sb(tile_data->cm, &tile_data->xd, tile, + mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64, + tile_data->token_cache); + } + } + return !tile_data->xd.corrupted; +} + +static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { + VP9_COMMON *const cm = &pbi->common; + const uint8_t *const data_end = pbi->source + pbi->source_sz; + const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); + int tile_col = 0; + + assert(tile_rows == 1); + (void)tile_rows; + + if (num_workers > pbi->num_tile_workers) { + int i; + CHECK_MEM_ERROR(cm, pbi->tile_workers, + vpx_realloc(pbi->tile_workers, + num_workers * sizeof(*pbi->tile_workers))); + for (i = pbi->num_tile_workers; i < num_workers; ++i) { + VP9Worker *const worker = &pbi->tile_workers[i]; + ++pbi->num_tile_workers; + + vp9_worker_init(worker); + worker->hook = (VP9WorkerHook)tile_worker_hook; + CHECK_MEM_ERROR(cm, worker->data1, + vpx_memalign(32, sizeof(TileWorkerData))); + CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo))); + if (i < num_workers - 1 && !vp9_worker_reset(worker)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Tile decoder thread creation failed"); + } + } + } + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + vpx_memset(pbi->above_context[0], 0, + sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * + 2 * aligned_mi_cols); + vpx_memset(pbi->above_seg_context, 0, + sizeof(*pbi->above_seg_context) * aligned_mi_cols); + + while (tile_col < tile_cols) { + int i; + for (i = 0; i < num_workers && tile_col < tile_cols; ++i) { + VP9Worker *const worker = &pbi->tile_workers[i]; + TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; + TileInfo *const tile = (TileInfo*)worker->data2; + const size_t size = + get_tile(data_end, tile_col == tile_cols - 1, &cm->error, &data); + + tile_data->cm = cm; + tile_data->xd = pbi->mb; + tile_data->xd.corrupted = 0; + vp9_tile_init(tile, tile_data->cm, 0, tile_col); + + setup_token_decoder(data, data_end, size, &cm->error, + &tile_data->bit_reader); + setup_tile_context(pbi, &tile_data->xd, tile_col); + + worker->had_error = 0; + if (i == num_workers - 1 || tile_col == tile_cols - 1) { + vp9_worker_execute(worker); + } else { + vp9_worker_launch(worker); + } + + data += size; + ++tile_col; + } + + for (; i > 0; --i) { + VP9Worker *const worker = &pbi->tile_workers[i - 1]; + pbi->mb.corrupted |= !vp9_worker_sync(worker); + } + } + + { + const int final_worker = (tile_cols + num_workers - 1) % num_workers; + TileWorkerData *const tile_data = + (TileWorkerData*)pbi->tile_workers[final_worker].data1; + return vp9_reader_find_end(&tile_data->bit_reader); + } +} + static void check_sync_code(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { - if (vp9_rb_read_literal(rb, 8) != SYNC_CODE_0 || - vp9_rb_read_literal(rb, 8) != SYNC_CODE_1 || - vp9_rb_read_literal(rb, 8) != SYNC_CODE_2) { + if (vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_0 || + vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_1 || + vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_2) { vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame sync code"); } @@ -758,34 +1064,6 @@ static void error_handler(void *data, size_t bit_offset) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } -static void setup_inter_inter(VP9_COMMON *cm) { - int i; - - cm->allow_comp_inter_inter = 0; - for (i = 1; i < ALLOWED_REFS_PER_FRAME; ++i) - cm->allow_comp_inter_inter |= - cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]; - - if (cm->allow_comp_inter_inter) { - // which one is always-on in comp inter-inter? - if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[GOLDEN_FRAME]) { - cm->comp_fixed_ref = ALTREF_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = GOLDEN_FRAME; - } else if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[ALTREF_FRAME]) { - cm->comp_fixed_ref = GOLDEN_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } else { - cm->comp_fixed_ref = LAST_FRAME; - cm->comp_var_ref[0] = GOLDEN_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } - } -} - #define RESERVED \ if (vp9_rb_read_bit(rb)) \ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \ @@ -794,12 +1072,12 @@ static void setup_inter_inter(VP9_COMMON *cm) { static size_t read_uncompressed_header(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; + size_t sz; int i; cm->last_frame_type = cm->frame_type; - if (vp9_rb_read_literal(rb, 2) != 0x2) + if (vp9_rb_read_literal(rb, 2) != VP9_FRAME_MARKER) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame marker"); @@ -820,12 +1098,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->error_resilient_mode = vp9_rb_read_bit(rb); if (cm->frame_type == KEY_FRAME) { - int csp; - check_sync_code(cm, rb); - csp = vp9_rb_read_literal(rb, 3); // colorspace - if (csp != 7) { // != sRGB + cm->color_space = vp9_rb_read_literal(rb, 3); // colorspace + if (cm->color_space != SRGB) { vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range if (cm->version == 1) { cm->subsampling_x = vp9_rb_read_bit(rb); @@ -872,13 +1148,11 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, setup_frame_size_with_refs(pbi, rb); - xd->allow_high_precision_mv = vp9_rb_read_bit(rb); + cm->allow_high_precision_mv = vp9_rb_read_bit(rb); cm->mcomp_filter_type = read_interp_filter_type(rb); for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) vp9_setup_scale_factors(cm, i); - - setup_inter_inter(cm); } } @@ -890,25 +1164,34 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->frame_parallel_decoding_mode = 1; } + // This flag will be overridden by the call to vp9_setup_past_independence + // below, forcing the use of context 0 for those frame types. cm->frame_context_idx = vp9_rb_read_literal(rb, NUM_FRAME_CONTEXTS_LOG2); - if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode || cm->intra_only) + if (frame_is_intra_only(cm) || cm->error_resilient_mode) vp9_setup_past_independence(cm); setup_loopfilter(&cm->lf, rb); - setup_quantization(pbi, rb); + setup_quantization(cm, &pbi->mb, rb); setup_segmentation(&cm->seg, rb); setup_tile_info(cm, rb); + sz = vp9_rb_read_literal(rb, 16); + + if (sz == 0) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid header size"); - return vp9_rb_read_literal(rb, 16); + return sz; } static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, size_t partition_size) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; + FRAME_CONTEXT *const fc = &cm->fc; vp9_reader r; + int k; if (vp9_reader_init(&r, data, partition_size)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, @@ -916,10 +1199,36 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r); if (cm->tx_mode == TX_MODE_SELECT) - read_tx_probs(&cm->fc.tx_probs, &r); - read_coef_probs(&cm->fc, cm->tx_mode, &r); + read_tx_probs(&fc->tx_probs, &r); + read_coef_probs(fc, cm->tx_mode, &r); + + for (k = 0; k < MBSKIP_CONTEXTS; ++k) + vp9_diff_update_prob(&r, &fc->mbskip_probs[k]); + + if (!frame_is_intra_only(cm)) { + nmv_context *const nmvc = &fc->nmvc; + int i, j; + + read_inter_mode_probs(fc, &r); + + if (cm->mcomp_filter_type == SWITCHABLE) + read_switchable_interp_probs(fc, &r); + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]); + + read_comp_pred(cm, &r); + + for (j = 0; j < BLOCK_SIZE_GROUPS; j++) + for (i = 0; i < INTRA_MODES - 1; ++i) + vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]); + + for (j = 0; j < PARTITION_CONTEXTS; ++j) + for (i = 0; i < PARTITION_TYPES - 1; ++i) + vp9_diff_update_prob(&r, &fc->partition_prob[j][i]); - vp9_prepare_read_mode_info(pbi, &r); + read_mv_probs(&r, nmvc, cm->allow_high_precision_mv); + } return vp9_reader_has_error(&r); } @@ -936,59 +1245,109 @@ void vp9_init_dequantizer(VP9_COMMON *cm) { } } +#ifdef NDEBUG +#define debug_check_frame_counts(cm) (void)0 +#else // !NDEBUG +// Counts should only be incremented when frame_parallel_decoding_mode and +// error_resilient_mode are disabled. +static void debug_check_frame_counts(const VP9_COMMON *const cm) { + FRAME_COUNTS zero_counts; + vp9_zero(zero_counts); + assert(cm->frame_parallel_decoding_mode || cm->error_resilient_mode); + assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode, + sizeof(cm->counts.y_mode))); + assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode, + sizeof(cm->counts.uv_mode))); + assert(!memcmp(cm->counts.partition, zero_counts.partition, + sizeof(cm->counts.partition))); + assert(!memcmp(cm->counts.coef, zero_counts.coef, + sizeof(cm->counts.coef))); + assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch, + sizeof(cm->counts.eob_branch))); + assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp, + sizeof(cm->counts.switchable_interp))); + assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode, + sizeof(cm->counts.inter_mode))); + assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter, + sizeof(cm->counts.intra_inter))); + assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter, + sizeof(cm->counts.comp_inter))); + assert(!memcmp(cm->counts.single_ref, zero_counts.single_ref, + sizeof(cm->counts.single_ref))); + assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref, + sizeof(cm->counts.comp_ref))); + assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx))); + assert(!memcmp(cm->counts.mbskip, zero_counts.mbskip, + sizeof(cm->counts.mbskip))); + assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); +} +#endif // NDEBUG + int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { int i; VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; const uint8_t *data = pbi->source; - const uint8_t *data_end = pbi->source + pbi->source_sz; + const uint8_t *const data_end = pbi->source + pbi->source_sz; - struct vp9_read_bit_buffer rb = { data, data_end, 0, - cm, error_handler }; + struct vp9_read_bit_buffer rb = { data, data_end, 0, cm, error_handler }; const size_t first_partition_size = read_uncompressed_header(pbi, &rb); const int keyframe = cm->frame_type == KEY_FRAME; - YV12_BUFFER_CONFIG *new_fb = &cm->yv12_fb[cm->new_fb_idx]; + const int tile_rows = 1 << cm->log2_tile_rows; + const int tile_cols = 1 << cm->log2_tile_cols; + YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); if (!first_partition_size) { - // showing a frame directly - *p_data_end = data + 1; - return 0; + // showing a frame directly + *p_data_end = data + 1; + return 0; } - data += vp9_rb_bytes_read(&rb); - xd->corrupted = 0; - new_fb->corrupted = 0; - pbi->do_loopfilter_inline = - (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; if (!pbi->decoded_key_frame && !keyframe) return -1; + data += vp9_rb_bytes_read(&rb); if (!read_is_valid(data, first_partition_size, data_end)) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt header length"); - setup_plane_dequants(cm, &pbi->mb, cm->base_qindex); + pbi->do_loopfilter_inline = + (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; + if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { + CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_malloc(sizeof(LFWorkerData))); + pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; + if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Loop filter thread creation failed"); + } + } + + alloc_tile_storage(pbi, tile_cols); xd->mi_8x8 = cm->mi_grid_visible; - xd->mic_stream_ptr = cm->mi; xd->mode_info_stride = cm->mode_info_stride; + set_prev_mi(cm); - cm->fc = cm->frame_contexts[cm->frame_context_idx]; - - vp9_zero(cm->counts); - - new_fb->corrupted |= read_compressed_header(pbi, data, first_partition_size); - + setup_plane_dequants(cm, xd, cm->base_qindex); setup_block_dptrs(xd, cm->subsampling_x, cm->subsampling_y); - // clear out the coeff buffer + cm->fc = cm->frame_contexts[cm->frame_context_idx]; + vp9_zero(cm->counts); for (i = 0; i < MAX_MB_PLANE; ++i) - vp9_zero(xd->plane[i].qcoeff); + vp9_zero(xd->plane[i].dqcoeff); - set_prev_mi(cm); + xd->corrupted = 0; + new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); - *p_data_end = decode_tiles(pbi, data + first_partition_size); + // TODO(jzern): remove frame_parallel_decoding_mode restriction for + // single-frame tile decoding. + if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 && + cm->frame_parallel_decoding_mode) { + *p_data_end = decode_tiles_mt(pbi, data + first_partition_size); + } else { + *p_data_end = decode_tiles(pbi, data + first_partition_size); + } cm->last_width = cm->width; cm->last_height = cm->height; @@ -1006,10 +1365,12 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { vp9_adapt_coef_probs(cm); - if (!keyframe && !cm->intra_only) { + if (!frame_is_intra_only(cm)) { vp9_adapt_mode_probs(cm); - vp9_adapt_mv_probs(cm, xd->allow_high_precision_mv); + vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); } + } else { + debug_check_frame_counts(cm); } if (cm->refresh_frame_context) diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c index cd74a0b..b8d670b 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.c +++ b/libvpx/vp9/decoder/vp9_detokenize.c @@ -61,53 +61,55 @@ static const vp9_prob cat6_prob[15] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -DECLARE_ALIGNED(16, extern const uint8_t, - vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); -#define INCREMENT_COUNT(token) \ - do { \ - coef_counts[type][ref][band][pt] \ - [token >= TWO_TOKEN ? \ - (token == DCT_EOB_TOKEN ? DCT_EOB_MODEL_TOKEN : TWO_TOKEN) : \ - token]++; \ - token_cache[scan[c]] = vp9_pt_energy_class[token]; \ - } while (0) +static const int token_to_counttoken[MAX_ENTROPY_TOKENS] = { + ZERO_TOKEN, ONE_TOKEN, TWO_TOKEN, TWO_TOKEN, + TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, + TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, DCT_EOB_MODEL_TOKEN +}; + +#define INCREMENT_COUNT(token) \ + do { \ + if (!cm->frame_parallel_decoding_mode) { \ + ++coef_counts[band][pt][token_to_counttoken[token]]; \ + } \ + } while (0); #define WRITE_COEF_CONTINUE(val, token) \ { \ - qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \ + dqcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \ dq[c > 0] / (1 + (tx_size == TX_32X32)); \ INCREMENT_COUNT(token); \ + token_cache[scan[c]] = vp9_pt_energy_class[token]; \ c++; \ continue; \ } -#define ADJUST_COEF(prob, bits_count) \ - do { \ - if (vp9_read(r, prob)) \ - val += 1 << bits_count; \ +#define ADJUST_COEF(prob, bits_count) \ + do { \ + val += (vp9_read(r, prob) << bits_count); \ } while (0); static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_reader *r, int block_idx, - PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr, - TX_SIZE tx_size, const int16_t *dq, - ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) { - FRAME_CONTEXT *const fc = &cm->fc; + PLANE_TYPE type, int seg_eob, int16_t *dqcoeff_ptr, + TX_SIZE tx_size, const int16_t *dq, int pt, + uint8_t *token_cache) { + const FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; - const int ref = is_inter_block(&xd->this_mi->mbmi); + const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); int band, c = 0; - vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] = + const vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { { 0 } }; - vp9_prob *prob; - vp9_coeff_count_model *coef_counts = counts->coef[tx_size]; + const vp9_prob *prob; + unsigned int (*coef_counts)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES + 1] = + counts->coef[tx_size][type][ref]; + unsigned int (*eob_branch_count)[PREV_COEF_CONTEXTS] = + counts->eob_branch[tx_size][type][ref]; const int16_t *scan, *nb; - const uint8_t *band_translate; - uint8_t token_cache[1024]; - int pt = get_entropy_context(xd, tx_size, type, block_idx, A, L, - &scan, &band_translate); - nb = vp9_get_coef_neighbors_handle(scan); + const uint8_t *const band_translate = get_band_translate(tx_size); + get_scan(xd, tx_size, type, block_idx, &scan, &nb); while (1) { int val; @@ -118,11 +120,12 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, pt = get_coef_context(nb, token_cache, c); band = get_coef_band(band_translate, c); prob = coef_probs[band][pt]; - counts->eob_branch[tx_size][type][ref][band][pt]++; + if (!cm->frame_parallel_decoding_mode) + ++eob_branch_count[band][pt]; if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) break; -SKIP_START: + SKIP_START: if (c >= seg_eob) break; if (c) @@ -132,6 +135,7 @@ SKIP_START: if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); + token_cache[scan[c]] = vp9_pt_energy_class[ZERO_TOKEN]; ++c; goto SKIP_START; } @@ -203,47 +207,34 @@ SKIP_START: WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6); } - if (c < seg_eob) - coef_counts[type][ref][band][pt][DCT_EOB_MODEL_TOKEN]++; - + if (c < seg_eob) { + if (!cm->frame_parallel_decoding_mode) + ++coef_counts[band][pt][DCT_EOB_MODEL_TOKEN]; + } return c; } -struct decode_block_args { - VP9D_COMP *pbi; - vp9_reader *r; - int *eobtotal; -}; - -static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *argv) { - const struct decode_block_args* const arg = argv; - - // find the maximum eob for this transform size, adjusted by segment - MACROBLOCKD *xd = &arg->pbi->mb; - struct segmentation *seg = &arg->pbi->common.seg; - struct macroblockd_plane* pd = &xd->plane[plane]; - const int segment_id = xd->this_mi->mbmi.segment_id; - const int seg_eob = get_tx_eob(seg, segment_id, tx_size); - int aoff, loff, eob; - +int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, + int plane, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, vp9_reader *r, + uint8_t *token_cache) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id, + tx_size); + int aoff, loff, eob, pt; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); + pt = get_entropy_context(tx_size, pd->above_context + aoff, + pd->left_context + loff); - eob = decode_coefs(&arg->pbi->common, xd, arg->r, block, - pd->plane_type, seg_eob, BLOCK_OFFSET(pd->qcoeff, block), - tx_size, pd->dequant, - pd->above_context + aoff, pd->left_context + loff); + eob = decode_coefs(cm, xd, r, block, + pd->plane_type, seg_eob, BLOCK_OFFSET(pd->dqcoeff, block), + tx_size, pd->dequant, pt, token_cache); set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff); pd->eobs[block] = eob; - *arg->eobtotal += eob; + return eob; } -int vp9_decode_tokens(VP9D_COMP *pbi, vp9_reader *r, BLOCK_SIZE bsize) { - int eobtotal = 0; - struct decode_block_args args = {pbi, r, &eobtotal}; - foreach_transformed_block(&pbi->mb, bsize, decode_block, &args); - return eobtotal; -} + diff --git a/libvpx/vp9/decoder/vp9_detokenize.h b/libvpx/vp9/decoder/vp9_detokenize.h index cf07c56..04939ea 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.h +++ b/libvpx/vp9/decoder/vp9_detokenize.h @@ -15,6 +15,9 @@ #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_dboolhuff.h" -int vp9_decode_tokens(VP9D_COMP* pbi, vp9_reader *r, BLOCK_SIZE bsize); +int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, + int plane, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, vp9_reader *r, + uint8_t *token_cache); #endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/libvpx/vp9/decoder/vp9_dsubexp.c b/libvpx/vp9/decoder/vp9_dsubexp.c index 8cc64f7..fcca017 100644 --- a/libvpx/vp9/decoder/vp9_dsubexp.c +++ b/libvpx/vp9/decoder/vp9_dsubexp.c @@ -48,8 +48,6 @@ static int merge_index(int v, int n, int modulus) { static int inv_remap_prob(int v, int m) { static int inv_map_table[MAX_PROB - 1] = { - // generated by: - // inv_map_table[j] = merge_index(j, MAX_PROB - 1, MODULUS_PARAM); 6, 19, 32, 45, 58, 71, 84, 97, 110, 123, 136, 149, 162, 175, 188, 201, 214, 227, 240, 253, 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, @@ -66,10 +64,11 @@ static int inv_remap_prob(int v, int m) { 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, - 238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, - + 238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252 }; - // v = merge_index(v, MAX_PROBS - 1, MODULUS_PARAM); + // The clamp is not necessary for conforming VP9 stream, it is added to + // prevent out of bound access for bad input data + v = clamp(v, 0, 253); v = inv_map_table[v]; m--; if ((m << 1) <= MAX_PROB) { @@ -101,6 +100,8 @@ static int decode_term_subexp(vp9_reader *r, int k, int num_syms) { } void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p) { - int delp = decode_term_subexp(r, SUBEXP_PARAM, 255); - *p = (vp9_prob)inv_remap_prob(delp, *p); + if (vp9_read(r, DIFF_UPDATE_PROB)) { + const int delp = decode_term_subexp(r, SUBEXP_PARAM, 255); + *p = (vp9_prob)inv_remap_prob(delp, *p); + } } diff --git a/libvpx/vp9/decoder/vp9_idct_blk.c b/libvpx/vp9/decoder/vp9_idct_blk.c deleted file mode 100644 index 395e636..0000000 --- a/libvpx/vp9/decoder/vp9_idct_blk.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9_rtcd.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/decoder/vp9_idct_blk.h" - -static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride, - int width, int height) { - int r, c; - - for (r = 0; r < height; r++) { - for (c = 0; c < width; c++) - dest[c] = clip_pixel(diff + dest[c]); - - dest += stride; - } -} - -void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest, - int stride) { - add_constant_residual(diff, dest, stride, 8, 8); -} - -void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest, - int stride) { - add_constant_residual(diff, dest, stride, 16, 16); -} - -void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest, - int stride) { - add_constant_residual(diff, dest, stride, 32, 32); -} - -void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride, - int eob) { - if (tx_type == DCT_DCT) { - vp9_idct_add(input, dest, stride, eob); - } else { - vp9_short_iht4x4_add(input, dest, stride, tx_type); - vpx_memset(input, 0, 32); - } -} - -void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct_add_8x8(input, dest, stride, eob); - } else { - if (eob > 0) { - vp9_short_iht8x8_add(input, dest, stride, tx_type); - vpx_memset(input, 0, 128); - } - } -} - -void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) { - if (eob > 1) { - vp9_short_idct4x4_add(input, dest, stride); - vpx_memset(input, 0, 32); - } else { - vp9_short_idct4x4_1_add(input, dest, stride); - ((int *)input)[0] = 0; - } -} - -void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest, int stride, - int eob) { - if (eob > 1) { - vp9_short_iwalsh4x4_add(input, dest, stride); - vpx_memset(input, 0, 32); - } else { - vp9_short_iwalsh4x4_1_add_c(input, dest, stride); - ((int *)input)[0] = 0; - } -} - -void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) { - // If dc is 1, then input[0] is the reconstructed value, do not need - // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. - - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to decide what to do. - // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. - // Combine that with code here. - if (eob) { - if (eob == 1) { - // DC only DCT coefficient - vp9_short_idct8x8_1_add(input, dest, stride); - input[0] = 0; - } else if (eob <= 10) { - vp9_short_idct10_8x8_add(input, dest, stride); - vpx_memset(input, 0, 128); - } else { - vp9_short_idct8x8_add(input, dest, stride); - vpx_memset(input, 0, 128); - } - } -} - -void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct_add_16x16(input, dest, stride, eob); - } else { - if (eob > 0) { - vp9_short_iht16x16_add(input, dest, stride, tx_type); - vpx_memset(input, 0, 512); - } - } -} - -void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { - /* The calculation can be simplified if there are not many non-zero dct - * coefficients. Use eobs to separate different cases. */ - if (eob) { - if (eob == 1) { - /* DC only DCT coefficient. */ - vp9_short_idct16x16_1_add(input, dest, stride); - input[0] = 0; - } else if (eob <= 10) { - vp9_short_idct10_16x16_add(input, dest, stride); - vpx_memset(input, 0, 512); - } else { - vp9_short_idct16x16_add(input, dest, stride); - vpx_memset(input, 0, 512); - } - } -} - -void vp9_idct_add_32x32_c(int16_t *input, uint8_t *dest, int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024); - - if (eob) { - if (eob == 1) { - vp9_short_idct1_32x32(input, output); - vp9_add_constant_residual_32x32(output[0], dest, stride); - input[0] = 0; - } else { - vp9_short_idct32x32_add(input, dest, stride); - vpx_memset(input, 0, 2048); - } - } -} - diff --git a/libvpx/vp9/decoder/vp9_idct_blk.h b/libvpx/vp9/decoder/vp9_idct_blk.h deleted file mode 100644 index 1810bd0..0000000 --- a/libvpx/vp9/decoder/vp9_idct_blk.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_IDCT_BLK_H_ -#define VP9_DECODER_VP9_IDCT_BLK_H_ - -#include "vp9/common/vp9_blockd.h" - - -void vp9_idct_add_lossless_c(int16_t *input, unsigned char *dest, int stride, - int eob); - -void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, - int stride, int eob); - -void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, - int stride, int eob); - -void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, - int stride, int eob); - -#endif // VP9_DECODER_VP9_IDCT_BLK_H_ diff --git a/libvpx/vp9/decoder/vp9_onyxd.h b/libvpx/vp9/decoder/vp9_onyxd.h index cd5b750..a4b9c24 100644 --- a/libvpx/vp9/decoder/vp9_onyxd.h +++ b/libvpx/vp9/decoder/vp9_onyxd.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_ONYXD_H_ -#define VP9_COMMON_VP9_ONYXD_H_ +#ifndef VP9_DECODER_VP9_ONYXD_H_ +#define VP9_DECODER_VP9_ONYXD_H_ #ifdef __cplusplus extern "C" { @@ -40,7 +40,7 @@ typedef enum { void vp9_initialize_dec(); int vp9_receive_compressed_data(VP9D_PTR comp, - uint64_t size, const uint8_t **dest, + size_t size, const uint8_t **dest, int64_t time_stamp); int vp9_get_raw_frame(VP9D_PTR comp, YV12_BUFFER_CONFIG *sd, @@ -66,4 +66,4 @@ void vp9_remove_decompressor(VP9D_PTR comp); } #endif -#endif // VP9_COMMON_VP9_ONYXD_H_ +#endif // VP9_DECODER_VP9_ONYXD_H_ diff --git a/libvpx/vp9/decoder/vp9_onyxd_if.c b/libvpx/vp9/decoder/vp9_onyxd_if.c index 17d5def..5f970a3 100644 --- a/libvpx/vp9/decoder/vp9_onyxd_if.c +++ b/libvpx/vp9/decoder/vp9_onyxd_if.c @@ -65,13 +65,12 @@ static void recon_write_yuv_frame(const char *name, #endif #if WRITE_RECON_BUFFER == 2 void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { - // write the frame FILE *yframe; int i; char filename[255]; - sprintf(filename, "dx\\y%04d.raw", this_frame); + snprintf(filename, sizeof(filename)-1, "dx\\y%04d.raw", this_frame); yframe = fopen(filename, "wb"); for (i = 0; i < frame->y_height; i++) @@ -79,7 +78,7 @@ void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { frame->y_width, 1, yframe); fclose(yframe); - sprintf(filename, "dx\\u%04d.raw", this_frame); + snprintf(filename, sizeof(filename)-1, "dx\\u%04d.raw", this_frame); yframe = fopen(filename, "wb"); for (i = 0; i < frame->uv_height; i++) @@ -87,7 +86,7 @@ void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { frame->uv_width, 1, yframe); fclose(yframe); - sprintf(filename, "dx\\v%04d.raw", this_frame); + snprintf(filename, sizeof(filename)-1, "dx\\v%04d.raw", this_frame); yframe = fopen(filename, "wb"); for (i = 0; i < frame->uv_height; i++) @@ -142,20 +141,13 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { cm->error.setjmp = 0; pbi->decoded_key_frame = 0; - if (pbi->oxcf.max_threads > 1) { - vp9_worker_init(&pbi->lf_worker); - pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData)); - pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; - if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) { - vp9_remove_decompressor(pbi); - return NULL; - } - } + vp9_worker_init(&pbi->lf_worker); return pbi; } void vp9_remove_decompressor(VP9D_PTR ptr) { + int i; VP9D_COMP *const pbi = (VP9D_COMP *)ptr; if (!pbi) @@ -164,6 +156,16 @@ void vp9_remove_decompressor(VP9D_PTR ptr) { vp9_remove_common(&pbi->common); vp9_worker_end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); + for (i = 0; i < pbi->num_tile_workers; ++i) { + VP9Worker *const worker = &pbi->tile_workers[i]; + vp9_worker_end(worker); + vpx_free(worker->data1); + vpx_free(worker->data2); + } + vpx_free(pbi->tile_workers); + vpx_free(pbi->mi_streams); + vpx_free(pbi->above_context[0]); + vpx_free(pbi->above_seg_context); vpx_free(pbi); } @@ -177,7 +179,6 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR ptr, YV12_BUFFER_CONFIG *sd) { VP9D_COMP *pbi = (VP9D_COMP *) ptr; VP9_COMMON *cm = &pbi->common; - int ref_fb_idx; /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the * encoder is using the frame buffers for. This is just a stub to keep the @@ -185,18 +186,15 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR ptr, * later commit that adds VP9-specific controls for this functionality. */ if (ref_frame_flag == VP9_LAST_FLAG) { - ref_fb_idx = cm->ref_frame_map[0]; + YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->ref_frame_map[0]]; + if (!equal_dimensions(cfg, sd)) + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Incorrect buffer dimensions"); + else + vp8_yv12_copy_frame(cfg, sd); } else { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame"); - return cm->error.error_code; - } - - if (!equal_dimensions(&cm->yv12_fb[ref_fb_idx], sd)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } else { - vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd); } return cm->error.error_code; @@ -214,13 +212,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag, * vpxenc --test-decode functionality working, and will be replaced in a * later commit that adds VP9-specific controls for this functionality. */ - if (ref_frame_flag == VP9_LAST_FLAG) + if (ref_frame_flag == VP9_LAST_FLAG) { ref_fb_ptr = &pbi->common.active_ref_idx[0]; - else if (ref_frame_flag == VP9_GOLD_FLAG) + } else if (ref_frame_flag == VP9_GOLD_FLAG) { ref_fb_ptr = &pbi->common.active_ref_idx[1]; - else if (ref_frame_flag == VP9_ALT_FLAG) + } else if (ref_frame_flag == VP9_ALT_FLAG) { ref_fb_ptr = &pbi->common.active_ref_idx[2]; - else { + } else { vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, "Invalid reference frame"); return pbi->common.error.error_code; @@ -268,7 +266,7 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { ++ref_index; } - cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; + cm->frame_to_show = get_frame_new_buffer(cm); cm->fb_idx_ref_cnt[cm->new_fb_idx]--; // Invalidate these references until the next frame starts. @@ -277,7 +275,7 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { } int vp9_receive_compressed_data(VP9D_PTR ptr, - uint64_t size, const uint8_t **psource, + size_t size, const uint8_t **psource, int64_t time_stamp) { VP9D_COMP *pbi = (VP9D_COMP *) ptr; VP9_COMMON *cm = &pbi->common; @@ -306,7 +304,7 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, * thing to do here. */ if (cm->active_ref_idx[0] != INT_MAX) - cm->yv12_fb[cm->active_ref_idx[0]].corrupted = 1; + get_frame_ref_buffer(cm, 0)->corrupted = 1; } cm->new_fb_idx = get_free_fb(cm); @@ -323,7 +321,7 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, * thing to do here. */ if (cm->active_ref_idx[0] != INT_MAX) - cm->yv12_fb[cm->active_ref_idx[0]].corrupted = 1; + get_frame_ref_buffer(cm, 0)->corrupted = 1; if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) cm->fb_idx_ref_cnt[cm->new_fb_idx]--; @@ -343,36 +341,33 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, return retcode; } - { - swap_frame_buffers(pbi); + swap_frame_buffers(pbi); #if WRITE_RECON_BUFFER == 2 - if (cm->show_frame) - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame); - else - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 1000); + if (cm->show_frame) + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame); + else + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 1000); #endif - if (!pbi->do_loopfilter_inline) { - /* Apply the loop filter if appropriate. */ - vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0); - } + if (!pbi->do_loopfilter_inline) { + vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0); + } #if WRITE_RECON_BUFFER == 2 - if (cm->show_frame) - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 2000); - else - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 3000); + if (cm->show_frame) + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 2000); + else + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 3000); #endif - vp9_extend_frame_inner_borders(cm->frame_to_show, - cm->subsampling_x, - cm->subsampling_y); - } + vp9_extend_frame_inner_borders(cm->frame_to_show, + cm->subsampling_x, + cm->subsampling_y); #if WRITE_RECON_BUFFER == 1 if (cm->show_frame) @@ -398,6 +393,9 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1; cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1; + pbi->mb.mi_8x8 = cm->mi_grid_visible; + pbi->mb.mi_8x8[0] = cm->mi; + cm->current_video_frame++; } diff --git a/libvpx/vp9/decoder/vp9_onyxd_int.h b/libvpx/vp9/decoder/vp9_onyxd_int.h index a051971..7c4c9db 100644 --- a/libvpx/vp9/decoder/vp9_onyxd_int.h +++ b/libvpx/vp9/decoder/vp9_onyxd_int.h @@ -25,7 +25,7 @@ typedef struct VP9Decompressor { VP9D_CONFIG oxcf; const uint8_t *source; - uint32_t source_sz; + size_t source_sz; int64_t last_time_stamp; int ready_for_new_data; @@ -39,6 +39,18 @@ typedef struct VP9Decompressor { int do_loopfilter_inline; // apply loopfilter to available rows immediately VP9Worker lf_worker; + + VP9Worker *tile_workers; + int num_tile_workers; + + /* Each tile column has its own MODE_INFO stream. This array indexes them by + tile column index. */ + MODE_INFO **mi_streams; + + ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; + PARTITION_CONTEXT *above_seg_context; + + DECLARE_ALIGNED(16, unsigned char, token_cache[1024]); } VP9D_COMP; -#endif // VP9_DECODER_VP9_TREEREADER_H_ +#endif // VP9_DECODER_VP9_ONYXD_INT_H_ diff --git a/libvpx/vp9/decoder/vp9_read_bit_buffer.h b/libvpx/vp9/decoder/vp9_read_bit_buffer.h index c7fa3aa..41a6868 100644 --- a/libvpx/vp9/decoder/vp9_read_bit_buffer.h +++ b/libvpx/vp9/decoder/vp9_read_bit_buffer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_READ_BIT_BUFFER_ -#define VP9_READ_BIT_BUFFER_ +#ifndef VP9_DECODER_VP9_READ_BIT_BUFFER_H_ +#define VP9_DECODER_VP9_READ_BIT_BUFFER_H_ #include <limits.h> @@ -57,4 +57,4 @@ static int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb, return vp9_rb_read_bit(rb) ? -value : value; } -#endif // VP9_READ_BIT_BUFFER_ +#endif // VP9_DECODER_VP9_READ_BIT_BUFFER_H_ diff --git a/libvpx/vp9/decoder/vp9_thread.c b/libvpx/vp9/decoder/vp9_thread.c index dc3b681..d953e72 100644 --- a/libvpx/vp9/decoder/vp9_thread.c +++ b/libvpx/vp9/decoder/vp9_thread.c @@ -29,7 +29,7 @@ extern "C" { //------------------------------------------------------------------------------ // simplistic pthread emulation layer -#include <process.h> +#include <process.h> // NOLINT // _beginthreadex requires __stdcall #define THREADFN unsigned int __stdcall @@ -145,9 +145,7 @@ static THREADFN thread_loop(void *ptr) { // thread loop pthread_cond_wait(&worker->condition_, &worker->mutex_); } if (worker->status_ == WORK) { - if (worker->hook) { - worker->had_error |= !worker->hook(worker->data1, worker->data2); - } + vp9_worker_execute(worker); worker->status_ = OK; } else if (worker->status_ == NOT_OK) { // finish the worker done = 1; @@ -178,7 +176,7 @@ static void change_state(VP9Worker* const worker, pthread_mutex_unlock(&worker->mutex_); } -#endif +#endif // CONFIG_MULTITHREAD //------------------------------------------------------------------------------ @@ -218,12 +216,17 @@ int vp9_worker_reset(VP9Worker* const worker) { return ok; } +void vp9_worker_execute(VP9Worker* const worker) { + if (worker->hook != NULL) { + worker->had_error |= !worker->hook(worker->data1, worker->data2); + } +} + void vp9_worker_launch(VP9Worker* const worker) { #if CONFIG_MULTITHREAD change_state(worker, WORK); #else - if (worker->hook) - worker->had_error |= !worker->hook(worker->data1, worker->data2); + vp9_worker_execute(worker); #endif } diff --git a/libvpx/vp9/decoder/vp9_thread.h b/libvpx/vp9/decoder/vp9_thread.h index a8f7e04..a624f3c 100644 --- a/libvpx/vp9/decoder/vp9_thread.h +++ b/libvpx/vp9/decoder/vp9_thread.h @@ -17,7 +17,7 @@ #ifndef VP9_DECODER_VP9_THREAD_H_ #define VP9_DECODER_VP9_THREAD_H_ -#include "vpx_config.h" +#include "./vpx_config.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -27,7 +27,7 @@ extern "C" { #if defined(_WIN32) -#include <windows.h> +#include <windows.h> // NOLINT typedef HANDLE pthread_t; typedef CRITICAL_SECTION pthread_mutex_t; typedef struct { @@ -38,7 +38,7 @@ typedef struct { #else -#include <pthread.h> +#include <pthread.h> // NOLINT #endif /* _WIN32 */ #endif /* CONFIG_MULTITHREAD */ @@ -80,6 +80,11 @@ int vp9_worker_sync(VP9Worker* const worker); // hook/data1/data2 can be changed at any time before calling this function, // but not be changed afterward until the next call to vp9_worker_sync(). void vp9_worker_launch(VP9Worker* const worker); +// This function is similar to vp9_worker_launch() except that it calls the +// hook directly instead of using a thread. Convenient to bypass the thread +// mechanism while still using the VP9Worker structs. vp9_worker_sync() must +// still be called afterward (for error reporting). +void vp9_worker_execute(VP9Worker* const worker); // Kill the thread and terminate the object. To use the object again, one // must call vp9_worker_reset() again. void vp9_worker_end(VP9Worker* const worker); @@ -90,4 +95,4 @@ void vp9_worker_end(VP9Worker* const worker); } // extern "C" #endif -#endif /* VP9_DECODER_VP9_THREAD_H_ */ +#endif // VP9_DECODER_VP9_THREAD_H_ diff --git a/libvpx/vp9/decoder/vp9_treereader.h b/libvpx/vp9/decoder/vp9_treereader.h index 710cc4c..f612497 100644 --- a/libvpx/vp9/decoder/vp9_treereader.h +++ b/libvpx/vp9/decoder/vp9_treereader.h @@ -23,7 +23,8 @@ static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */ const vp9_prob *const p) { register vp9_tree_index i = 0; - while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0); + while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0) + continue; return -i; } diff --git a/libvpx/vp9/decoder/x86/vp9_dequantize_sse2.c b/libvpx/vp9/decoder/x86/vp9_dequantize_sse2.c deleted file mode 100644 index 54ec67f..0000000 --- a/libvpx/vp9/decoder/x86/vp9_dequantize_sse2.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> -#include <emmintrin.h> // SSE2 -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_idct.h" - -void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest, - int stride) { - uint8_t abs_diff; - __m128i d; - - // Prediction data. - __m128i p0 = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride)); - __m128i p1 = _mm_loadl_epi64((const __m128i *)(dest + 1 * stride)); - __m128i p2 = _mm_loadl_epi64((const __m128i *)(dest + 2 * stride)); - __m128i p3 = _mm_loadl_epi64((const __m128i *)(dest + 3 * stride)); - __m128i p4 = _mm_loadl_epi64((const __m128i *)(dest + 4 * stride)); - __m128i p5 = _mm_loadl_epi64((const __m128i *)(dest + 5 * stride)); - __m128i p6 = _mm_loadl_epi64((const __m128i *)(dest + 6 * stride)); - __m128i p7 = _mm_loadl_epi64((const __m128i *)(dest + 7 * stride)); - - p0 = _mm_unpacklo_epi64(p0, p1); - p2 = _mm_unpacklo_epi64(p2, p3); - p4 = _mm_unpacklo_epi64(p4, p5); - p6 = _mm_unpacklo_epi64(p6, p7); - - // Clip diff value to [0, 255] range. Then, do addition or subtraction - // according to its sign. - if (diff >= 0) { - abs_diff = (diff > 255) ? 255 : diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - - p0 = _mm_adds_epu8(p0, d); - p2 = _mm_adds_epu8(p2, d); - p4 = _mm_adds_epu8(p4, d); - p6 = _mm_adds_epu8(p6, d); - } else { - abs_diff = (diff < -255) ? 255 : -diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - - p0 = _mm_subs_epu8(p0, d); - p2 = _mm_subs_epu8(p2, d); - p4 = _mm_subs_epu8(p4, d); - p6 = _mm_subs_epu8(p6, d); - } - - _mm_storel_epi64((__m128i *)(dest + 0 * stride), p0); - p0 = _mm_srli_si128(p0, 8); - _mm_storel_epi64((__m128i *)(dest + 1 * stride), p0); - - _mm_storel_epi64((__m128i *)(dest + 2 * stride), p2); - p2 = _mm_srli_si128(p2, 8); - _mm_storel_epi64((__m128i *)(dest + 3 * stride), p2); - - _mm_storel_epi64((__m128i *)(dest + 4 * stride), p4); - p4 = _mm_srli_si128(p4, 8); - _mm_storel_epi64((__m128i *)(dest + 5 * stride), p4); - - _mm_storel_epi64((__m128i *)(dest + 6 * stride), p6); - p6 = _mm_srli_si128(p6, 8); - _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6); -} - -void vp9_add_constant_residual_16x16_sse2(const int16_t diff, uint8_t *dest, - int stride) { - uint8_t abs_diff; - __m128i d; - - // Prediction data. - __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); - __m128i p1 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); - __m128i p2 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); - __m128i p3 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); - __m128i p4 = _mm_load_si128((const __m128i *)(dest + 4 * stride)); - __m128i p5 = _mm_load_si128((const __m128i *)(dest + 5 * stride)); - __m128i p6 = _mm_load_si128((const __m128i *)(dest + 6 * stride)); - __m128i p7 = _mm_load_si128((const __m128i *)(dest + 7 * stride)); - __m128i p8 = _mm_load_si128((const __m128i *)(dest + 8 * stride)); - __m128i p9 = _mm_load_si128((const __m128i *)(dest + 9 * stride)); - __m128i p10 = _mm_load_si128((const __m128i *)(dest + 10 * stride)); - __m128i p11 = _mm_load_si128((const __m128i *)(dest + 11 * stride)); - __m128i p12 = _mm_load_si128((const __m128i *)(dest + 12 * stride)); - __m128i p13 = _mm_load_si128((const __m128i *)(dest + 13 * stride)); - __m128i p14 = _mm_load_si128((const __m128i *)(dest + 14 * stride)); - __m128i p15 = _mm_load_si128((const __m128i *)(dest + 15 * stride)); - - // Clip diff value to [0, 255] range. Then, do addition or subtraction - // according to its sign. - if (diff >= 0) { - abs_diff = (diff > 255) ? 255 : diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - - p0 = _mm_adds_epu8(p0, d); - p1 = _mm_adds_epu8(p1, d); - p2 = _mm_adds_epu8(p2, d); - p3 = _mm_adds_epu8(p3, d); - p4 = _mm_adds_epu8(p4, d); - p5 = _mm_adds_epu8(p5, d); - p6 = _mm_adds_epu8(p6, d); - p7 = _mm_adds_epu8(p7, d); - p8 = _mm_adds_epu8(p8, d); - p9 = _mm_adds_epu8(p9, d); - p10 = _mm_adds_epu8(p10, d); - p11 = _mm_adds_epu8(p11, d); - p12 = _mm_adds_epu8(p12, d); - p13 = _mm_adds_epu8(p13, d); - p14 = _mm_adds_epu8(p14, d); - p15 = _mm_adds_epu8(p15, d); - } else { - abs_diff = (diff < -255) ? 255 : -diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - - p0 = _mm_subs_epu8(p0, d); - p1 = _mm_subs_epu8(p1, d); - p2 = _mm_subs_epu8(p2, d); - p3 = _mm_subs_epu8(p3, d); - p4 = _mm_subs_epu8(p4, d); - p5 = _mm_subs_epu8(p5, d); - p6 = _mm_subs_epu8(p6, d); - p7 = _mm_subs_epu8(p7, d); - p8 = _mm_subs_epu8(p8, d); - p9 = _mm_subs_epu8(p9, d); - p10 = _mm_subs_epu8(p10, d); - p11 = _mm_subs_epu8(p11, d); - p12 = _mm_subs_epu8(p12, d); - p13 = _mm_subs_epu8(p13, d); - p14 = _mm_subs_epu8(p14, d); - p15 = _mm_subs_epu8(p15, d); - } - - // Store results - _mm_store_si128((__m128i *)(dest + 0 * stride), p0); - _mm_store_si128((__m128i *)(dest + 1 * stride), p1); - _mm_store_si128((__m128i *)(dest + 2 * stride), p2); - _mm_store_si128((__m128i *)(dest + 3 * stride), p3); - _mm_store_si128((__m128i *)(dest + 4 * stride), p4); - _mm_store_si128((__m128i *)(dest + 5 * stride), p5); - _mm_store_si128((__m128i *)(dest + 6 * stride), p6); - _mm_store_si128((__m128i *)(dest + 7 * stride), p7); - _mm_store_si128((__m128i *)(dest + 8 * stride), p8); - _mm_store_si128((__m128i *)(dest + 9 * stride), p9); - _mm_store_si128((__m128i *)(dest + 10 * stride), p10); - _mm_store_si128((__m128i *)(dest + 11 * stride), p11); - _mm_store_si128((__m128i *)(dest + 12 * stride), p12); - _mm_store_si128((__m128i *)(dest + 13 * stride), p13); - _mm_store_si128((__m128i *)(dest + 14 * stride), p14); - _mm_store_si128((__m128i *)(dest + 15 * stride), p15); -} - -void vp9_add_constant_residual_32x32_sse2(const int16_t diff, uint8_t *dest, - int stride) { - uint8_t abs_diff; - __m128i d; - int i = 8; - - if (diff >= 0) { - abs_diff = (diff > 255) ? 255 : diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - } else { - abs_diff = (diff < -255) ? 255 : -diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - } - - do { - // Prediction data. - __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); - __m128i p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride + 16)); - __m128i p2 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); - __m128i p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride + 16)); - __m128i p4 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); - __m128i p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride + 16)); - __m128i p6 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); - __m128i p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride + 16)); - - // Clip diff value to [0, 255] range. Then, do addition or subtraction - // according to its sign. - if (diff >= 0) { - p0 = _mm_adds_epu8(p0, d); - p1 = _mm_adds_epu8(p1, d); - p2 = _mm_adds_epu8(p2, d); - p3 = _mm_adds_epu8(p3, d); - p4 = _mm_adds_epu8(p4, d); - p5 = _mm_adds_epu8(p5, d); - p6 = _mm_adds_epu8(p6, d); - p7 = _mm_adds_epu8(p7, d); - } else { - p0 = _mm_subs_epu8(p0, d); - p1 = _mm_subs_epu8(p1, d); - p2 = _mm_subs_epu8(p2, d); - p3 = _mm_subs_epu8(p3, d); - p4 = _mm_subs_epu8(p4, d); - p5 = _mm_subs_epu8(p5, d); - p6 = _mm_subs_epu8(p6, d); - p7 = _mm_subs_epu8(p7, d); - } - - // Store results - _mm_store_si128((__m128i *)(dest + 0 * stride), p0); - _mm_store_si128((__m128i *)(dest + 0 * stride + 16), p1); - _mm_store_si128((__m128i *)(dest + 1 * stride), p2); - _mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3); - _mm_store_si128((__m128i *)(dest + 2 * stride), p4); - _mm_store_si128((__m128i *)(dest + 2 * stride + 16), p5); - _mm_store_si128((__m128i *)(dest + 3 * stride), p6); - _mm_store_si128((__m128i *)(dest + 3 * stride + 16), p7); - - dest += 4 * stride; - } while (--i); -} |