diff options
author | hkuang <hkuang@google.com> | 2014-04-10 17:21:41 +0000 |
---|---|---|
committer | Android Git Automerger <android-git-automerger@android.com> | 2014-04-10 17:21:41 +0000 |
commit | 704338728f68ac5ff7f36186bc4cdff890994959 (patch) | |
tree | d4655502ab2768db0858ae72ba2de7b530fa3afd /libvpx/vp9/decoder | |
parent | 7616a7e29066d65ecd1d6f54485360d3964c67bb (diff) | |
parent | 4fb68e5dd4e93c7599dc905d861de11ac39c5585 (diff) | |
download | android_external_libvpx-704338728f68ac5ff7f36186bc4cdff890994959.tar.gz android_external_libvpx-704338728f68ac5ff7f36186bc4cdff890994959.tar.bz2 android_external_libvpx-704338728f68ac5ff7f36186bc4cdff890994959.zip |
am 4fb68e5d: Roll latest libvpx to fix hang when doing adaptive playback.
* commit '4fb68e5dd4e93c7599dc905d861de11ac39c5585':
Roll latest libvpx to fix hang when doing adaptive playback.
Diffstat (limited to 'libvpx/vp9/decoder')
-rw-r--r-- | libvpx/vp9/decoder/vp9_decodeframe.c | 228 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_decodeframe.h | 7 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_decodemv.c | 26 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_decoder.c | 130 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_decoder.h | 34 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_detokenize.c | 2 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_dthread.c | 26 | ||||
-rw-r--r-- | libvpx/vp9/decoder/vp9_dthread.h | 5 |
8 files changed, 175 insertions, 283 deletions
diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c index 6926657..9b63961 100644 --- a/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/libvpx/vp9/decoder/vp9_decodeframe.c @@ -187,41 +187,13 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { xd->plane[i].dequant = cm->uv_dequant[q_index]; } -// Allocate storage for each tile column. -// TODO(jzern): when max_threads <= 1 the same storage could be used for each -// tile. -static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) { - VP9_COMMON *const cm = &pbi->common; - const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - int i; - - // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm - // block where mi unit size is 8x8. - CHECK_MEM_ERROR(cm, pbi->above_context[0], - vpx_realloc(pbi->above_context[0], - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * - 2 * aligned_mi_cols)); - for (i = 1; i < MAX_MB_PLANE; ++i) { - pbi->above_context[i] = pbi->above_context[0] + - i * sizeof(*pbi->above_context[0]) * - 2 * aligned_mi_cols; - } - - // This is sized based on the entire frame. Each tile operates within its - // column bounds. - CHECK_MEM_ERROR(cm, pbi->above_seg_context, - vpx_realloc(pbi->above_seg_context, - sizeof(*pbi->above_seg_context) * - aligned_mi_cols)); -} - static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, TX_SIZE tx_size, uint8_t *dst, int stride, int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; if (eob > 0) { TX_TYPE tx_type; - const int plane_type = pd->plane_type; + const PLANE_TYPE plane_type = pd->plane_type; int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); switch (tx_size) { case TX_4X4: @@ -269,11 +241,11 @@ struct intra_args { static void predict_and_reconstruct_intra_block(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { - struct intra_args *const args = arg; + struct intra_args *const args = (struct intra_args *)arg; VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; const MB_PREDICTION_MODE mode = (plane == 0) ? get_y_mode(mi, block) : mi->mbmi.uv_mode; int x, y; @@ -305,7 +277,7 @@ struct inter_args { static void reconstruct_inter_block(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { - struct inter_args *args = arg; + struct inter_args *args = (struct inter_args *)arg; VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -319,36 +291,36 @@ static void reconstruct_inter_block(int plane, int block, *args->eobtotal += eob; } -static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, - const TileInfo *const tile, - BLOCK_SIZE bsize, int mi_row, int mi_col) { +static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int x_mis = MIN(bw, cm->mi_cols - mi_col); const int y_mis = MIN(bh, cm->mi_rows - mi_row); - const int offset = mi_row * cm->mode_info_stride + mi_col; + const int offset = mi_row * cm->mi_stride + mi_col; int x, y; - xd->mi_8x8 = cm->mi_grid_visible + offset; - xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset; - xd->mi_8x8[0] = &cm->mi[offset]; - xd->mi_8x8[0]->mbmi.sb_type = bsize; + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + xd->mi[0]->mbmi.sb_type = bsize; for (y = 0; y < y_mis; ++y) for (x = !y; x < x_mis; ++x) - xd->mi_8x8[y * cm->mode_info_stride + x] = xd->mi_8x8[0]; + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; - set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col); + set_skip_context(xd, mi_row, mi_col); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); + return &xd->mi[0]->mbmi; } static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, int idx, int mi_row, int mi_col) { - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME]; xd->block_refs[idx] = ref_buffer; if (!vp9_is_valid_scale(&ref_buffer->sf)) @@ -364,17 +336,12 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE bsize) { const int less8x8 = bsize < BLOCK_8X8; - MB_MODE_INFO *mbmi; - - set_offsets(cm, xd, tile, bsize, mi_row, mi_col); + MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); if (less8x8) bsize = BLOCK_8X8; - // Has to be called after set_offsets - mbmi = &xd->mi_8x8[0]->mbmi; - if (mbmi->skip) { reset_skip_context(xd, bsize); } else { @@ -393,8 +360,6 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, if (has_second_ref(mbmi)) set_ref(cm, xd, 1, mi_row, mi_col); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - // Prediction vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); @@ -414,16 +379,14 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, int mi_row, int mi_col, BLOCK_SIZE bsize, vp9_reader *r) { - const int ctx = partition_plane_context(xd->above_seg_context, - xd->left_seg_context, - mi_row, mi_col, bsize); + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vp9_prob *const probs = get_partition_probs(cm, ctx); const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; PARTITION_TYPE p; if (has_rows && has_cols) - p = vp9_read_tree(r, vp9_partition_tree, probs); + p = (PARTITION_TYPE)vp9_read_tree(r, vp9_partition_tree, probs); else if (!has_rows && has_cols) p = vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; else if (has_rows && !has_cols) @@ -481,8 +444,7 @@ static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - update_partition_context(xd->above_seg_context, xd->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } static void setup_token_decoder(const uint8_t *data, @@ -650,9 +612,7 @@ static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { read_frame_size(rb, &cm->display_width, &cm->display_height); } -static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { - VP9_COMMON *cm = &pbi->common; - +static void apply_frame_size(VP9_COMMON *cm, int width, int height) { if (cm->width != width || cm->height != height) { // Change in frame size. // TODO(agrange) Don't test width/height, check overall size. @@ -679,18 +639,15 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { } } -static void setup_frame_size(VP9D_COMP *pbi, - struct vp9_read_bit_buffer *rb) { +static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { int width, height; read_frame_size(rb, &width, &height); - apply_frame_size(pbi, width, height); - setup_display_size(&pbi->common, rb); + apply_frame_size(cm, width, height); + setup_display_size(cm, rb); } -static void setup_frame_size_with_refs(VP9D_COMP *pbi, +static void setup_frame_size_with_refs(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; - int width, height; int found = 0, i; for (i = 0; i < REFS_PER_FRAME; ++i) { @@ -710,22 +667,11 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame with invalid size"); - apply_frame_size(pbi, width, height); + apply_frame_size(cm, width, height); setup_display_size(cm, rb); } -static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd, - int tile_row, int tile_col) { - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) - xd->above_context[i] = pbi->above_context[i]; - - // see note in alloc_tile_storage(). - xd->above_seg_context = pbi->above_seg_context; -} - -static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, +static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, vp9_reader *r) { const int num_threads = pbi->oxcf.max_threads; VP9_COMMON *const cm = &pbi->common; @@ -830,15 +776,15 @@ typedef struct TileBuffer { int col; // only used with multi-threaded decoding } TileBuffer; -static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { +static const uint8_t *decode_tiles(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; TileBuffer tile_buffers[4][1 << 6]; int tile_row, tile_col; - const uint8_t *const data_end = pbi->source + pbi->source_sz; const uint8_t *end = NULL; vp9_reader r; @@ -847,11 +793,11 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(pbi->above_context[0], 0, - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * 2 * aligned_cols); + vpx_memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); - vpx_memset(pbi->above_seg_context, 0, - sizeof(*pbi->above_seg_context) * aligned_cols); + vpx_memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_cols); // Load tile data into tile_buffers for (tile_row = 0; tile_row < tile_rows; ++tile_row) { @@ -878,7 +824,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { vp9_tile_init(&tile, cm, tile_row, col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r); - setup_tile_context(pbi, xd, tile_row, col); decode_tile(pbi, &tile, &r); if (last_tile) @@ -889,17 +834,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { return end; } -static void setup_tile_macroblockd(TileWorkerData *const tile_data) { - MACROBLOCKD *xd = &tile_data->xd; - struct macroblockd_plane *const pd = xd->plane; - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - pd[i].dqcoeff = tile_data->dqcoeff[i]; - vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); - } -} - static int tile_worker_hook(void *arg1, void *arg2) { TileWorkerData *const tile_data = (TileWorkerData*)arg1; const TileInfo *const tile = (TileInfo*)arg2; @@ -931,10 +865,11 @@ static int compare_tile_buffers(const void *a, const void *b) { } } -static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { +static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; const uint8_t *bit_reader_end = NULL; - const uint8_t *const data_end = pbi->source + pbi->source_sz; const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; @@ -947,12 +882,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { assert(tile_rows == 1); (void)tile_rows; - if (num_workers > pbi->num_tile_workers) { + // TODO(jzern): See if we can remove the restriction of passing in max + // threads to the decoder. + if (pbi->num_tile_workers == 0) { + const int num_threads = pbi->oxcf.max_threads & ~1; int i; + // TODO(jzern): Allocate one less worker, as in the current code we only + // use num_threads - 1 workers. CHECK_MEM_ERROR(cm, pbi->tile_workers, - vpx_realloc(pbi->tile_workers, - num_workers * sizeof(*pbi->tile_workers))); - for (i = pbi->num_tile_workers; i < num_workers; ++i) { + vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); + for (i = 0; i < num_threads; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; ++pbi->num_tile_workers; @@ -960,7 +899,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { CHECK_MEM_ERROR(cm, worker->data1, vpx_memalign(32, sizeof(TileWorkerData))); CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo))); - if (i < num_workers - 1 && !vp9_worker_reset(worker)) { + if (i < num_threads - 1 && !vp9_worker_reset(worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile decoder thread creation failed"); } @@ -968,17 +907,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { } // Reset tile decoding hook - for (n = 0; n < pbi->num_tile_workers; ++n) { + for (n = 0; n < num_workers; ++n) { pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook; } // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(pbi->above_context[0], 0, - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * - 2 * aligned_mi_cols); - vpx_memset(pbi->above_seg_context, 0, - sizeof(*pbi->above_seg_context) * aligned_mi_cols); + vpx_memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); + vpx_memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_mi_cols); // Load tile data into tile_buffers for (n = 0; n < tile_cols; ++n) { @@ -1023,11 +961,10 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; vp9_tile_init(tile, tile_data->cm, 0, buf->col); - setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader); - setup_tile_context(pbi, &tile_data->xd, 0, buf->col); - setup_tile_macroblockd(tile_data); + init_macroblockd(cm, &tile_data->xd); + vp9_zero(tile_data->xd.dqcoeff); worker->had_error = 0; if (i == num_workers - 1 || n == tile_cols - 1) { @@ -1072,12 +1009,13 @@ static void error_handler(void *data) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } -#define RESERVED \ - if (vp9_rb_read_bit(rb)) \ - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \ - "Reserved bit must be unset") +static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) { + int profile = vp9_rb_read_bit(rb); + profile |= vp9_rb_read_bit(rb) << 1; + return (BITSTREAM_PROFILE) profile; +} -static size_t read_uncompressed_header(VP9D_COMP *pbi, +static size_t read_uncompressed_header(VP9Decoder *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; size_t sz; @@ -1089,8 +1027,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame marker"); - cm->version = vp9_rb_read_bit(rb); - RESERVED; + cm->profile = read_profile(rb); + if (cm->profile >= MAX_PROFILES) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Unsupported bitstream profile"); cm->show_existing_frame = vp9_rb_read_bit(rb); if (cm->show_existing_frame) { @@ -1115,11 +1055,12 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, if (cm->frame_type == KEY_FRAME) { check_sync_code(cm, rb); - - cm->color_space = vp9_rb_read_literal(rb, 3); // colorspace + if (cm->profile > PROFILE_1) + cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10; + cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3); if (cm->color_space != SRGB) { vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_x = vp9_rb_read_bit(rb); cm->subsampling_y = vp9_rb_read_bit(rb); vp9_rb_read_bit(rb); // has extra plane @@ -1127,7 +1068,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->subsampling_y = cm->subsampling_x = 1; } } else { - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_y = cm->subsampling_x = 0; vp9_rb_read_bit(rb); // has extra plane } else { @@ -1143,7 +1084,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->frame_refs[i].buf = get_frame_new_buffer(cm); } - setup_frame_size(pbi, rb); + setup_frame_size(cm, rb); } else { cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb); @@ -1154,7 +1095,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, check_sync_code(cm, rb); pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); - setup_frame_size(pbi, rb); + setup_frame_size(cm, rb); } else { pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); @@ -1166,7 +1107,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb); } - setup_frame_size_with_refs(pbi, rb); + setup_frame_size_with_refs(cm, rb); cm->allow_high_precision_mv = vp9_rb_read_bit(rb); cm->interp_filter = read_interp_filter(rb); @@ -1214,7 +1155,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, return sz; } -static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, +static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, size_t partition_size) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; @@ -1314,14 +1255,12 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) { } #endif // NDEBUG -int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { - int i; +int vp9_decode_frame(VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - const uint8_t *data = pbi->source; - const uint8_t *const data_end = pbi->source + pbi->source_sz; - struct vp9_read_bit_buffer rb = { data, data_end, 0, cm, error_handler }; const size_t first_partition_size = read_uncompressed_header(pbi, &rb); const int keyframe = cm->frame_type == KEY_FRAME; @@ -1347,7 +1286,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { pbi->do_loopfilter_inline = (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { - CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_malloc(sizeof(LFWorkerData))); + CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, + vpx_memalign(32, sizeof(LFWorkerData))); pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, @@ -1355,21 +1295,15 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { } } - alloc_tile_storage(pbi, tile_rows, tile_cols); - - xd->mode_info_stride = cm->mode_info_stride; - if (cm->coding_use_prev_mi) - set_prev_mi(cm); - else - cm->prev_mi = NULL; + init_macroblockd(cm, &pbi->mb); + cm->prev_mi = get_prev_mi(cm); setup_plane_dequants(cm, xd, cm->base_qindex); vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); cm->fc = cm->frame_contexts[cm->frame_context_idx]; vp9_zero(cm->counts); - for (i = 0; i < MAX_MB_PLANE; ++i) - vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); + vp9_zero(xd->dqcoeff); xd->corrupted = 0; new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); @@ -1378,9 +1312,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { // single-frame tile decoding. if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 && cm->frame_parallel_decoding_mode) { - *p_data_end = decode_tiles_mt(pbi, data + first_partition_size); + *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); } else { - *p_data_end = decode_tiles(pbi, data + first_partition_size); + *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); } new_fb->corrupted |= xd->corrupted; diff --git a/libvpx/vp9/decoder/vp9_decodeframe.h b/libvpx/vp9/decoder/vp9_decodeframe.h index 4537bc8..8a19daf 100644 --- a/libvpx/vp9/decoder/vp9_decodeframe.h +++ b/libvpx/vp9/decoder/vp9_decodeframe.h @@ -17,10 +17,13 @@ extern "C" { #endif struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; void vp9_init_dequantizer(struct VP9Common *cm); -int vp9_decode_frame(struct VP9Decompressor *cpi, const uint8_t **p_data_end); + +int vp9_decode_frame(struct VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c index 06a21ea..3618f12 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.c +++ b/libvpx/vp9/decoder/vp9_decodemv.c @@ -63,7 +63,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, TX_SIZE max_tx_size, vp9_reader *r) { const int ctx = vp9_get_tx_size_context(xd); const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc.tx_probs); - TX_SIZE tx_size = (TX_SIZE)vp9_read(r, tx_probs[0]); + int tx_size = vp9_read(r, tx_probs[0]); if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { tx_size += vp9_read(r, tx_probs[1]); if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) @@ -72,7 +72,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, if (!cm->frame_parallel_decoding_mode) ++get_tx_counts(max_tx_size, ctx, &cm->counts.tx)[tx_size]; - return tx_size; + return (TX_SIZE)tx_size; } static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, TX_MODE tx_mode, @@ -104,7 +104,7 @@ static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { struct segmentation *const seg = &cm->seg; - const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int segment_id; if (!seg->enabled) @@ -121,7 +121,7 @@ static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { struct segmentation *const seg = &cm->seg; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; int predicted_segment_id, segment_id; @@ -161,10 +161,10 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, static void read_intra_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; - const MODE_INFO *above_mi = xd->mi_8x8[-cm->mode_info_stride]; - const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; + const MODE_INFO *above_mi = xd->mi[-cm->mi_stride]; + const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; const BLOCK_SIZE bsize = mbmi->sb_type; int i; @@ -237,14 +237,15 @@ static int read_mv_component(vp9_reader *r, static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *ctx, nmv_context_counts *counts, int allow_hp) { - const MV_JOINT_TYPE j = vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints); + const MV_JOINT_TYPE joint_type = + (MV_JOINT_TYPE)vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints); const int use_hp = allow_hp && vp9_use_mv_hp(ref); MV diff = {0, 0}; - if (mv_joint_vertical(j)) + if (mv_joint_vertical(joint_type)) diff.row = read_mv_component(r, &ctx->comps[0], use_hp); - if (mv_joint_horizontal(j)) + if (mv_joint_horizontal(joint_type)) diff.col = read_mv_component(r, &ctx->comps[1], use_hp); vp9_inc_mv(&diff, counts); @@ -276,7 +277,8 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, FRAME_COUNTS *const counts = &cm->counts; if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { - ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); + ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id, + SEG_LVL_REF_FRAME); ref_frame[1] = NONE; } else { const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r); @@ -513,7 +515,7 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r) { - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; int inter_block; diff --git a/libvpx/vp9/decoder/vp9_decoder.c b/libvpx/vp9/decoder/vp9_decoder.c index ebb329c..fd74478 100644 --- a/libvpx/vp9/decoder/vp9_decoder.c +++ b/libvpx/vp9/decoder/vp9_decoder.c @@ -104,23 +104,14 @@ void vp9_initialize_dec() { static int init_done = 0; if (!init_done) { - vp9_initialize_common(); + vp9_init_neighbors(); vp9_init_quant_tables(); init_done = 1; } } -static void init_macroblockd(VP9D_COMP *const pbi) { - MACROBLOCKD *xd = &pbi->mb; - struct macroblockd_plane *const pd = xd->plane; - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) - pd[i].dqcoeff = pbi->dqcoeff[i]; -} - -VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { - VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP)); +VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf) { + VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; if (!cm) @@ -128,12 +119,9 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_zero(*pbi); - // Initialize the references to not point to any frame buffers. - memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; - vp9_remove_decompressor(pbi); + vp9_decoder_remove(pbi); return NULL; } @@ -142,9 +130,13 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_rtcd(); + // Initialize the references to not point to any frame buffers. + vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + + cm->current_video_frame = 0; pbi->oxcf = *oxcf; pbi->ready_for_new_data = 1; - cm->current_video_frame = 0; + pbi->decoded_key_frame = 0; // vp9_init_dequantizer() is first called here. Add check in // frame_init_dequantizer() to avoid unnecessary calling of @@ -154,22 +146,17 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_loop_filter_init(cm); cm->error.setjmp = 0; - pbi->decoded_key_frame = 0; - - init_macroblockd(pbi); vp9_worker_init(&pbi->lf_worker); return pbi; } -void vp9_remove_decompressor(VP9D_COMP *pbi) { +void vp9_decoder_remove(VP9Decoder *pbi) { + VP9_COMMON *const cm = &pbi->common; int i; - if (!pbi) - return; - - vp9_remove_common(&pbi->common); + vp9_remove_common(cm); vp9_worker_end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); for (i = 0; i < pbi->num_tile_workers; ++i) { @@ -181,16 +168,11 @@ void vp9_remove_decompressor(VP9D_COMP *pbi) { vpx_free(pbi->tile_workers); if (pbi->num_tile_workers) { - VP9_COMMON *const cm = &pbi->common; const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; - VP9LfSync *const lf_sync = &pbi->lf_row_sync; - - vp9_loop_filter_dealloc(lf_sync, sb_rows); + vp9_loop_filter_dealloc(&pbi->lf_row_sync, sb_rows); } - vpx_free(pbi->above_context[0]); - vpx_free(pbi->above_seg_context); vpx_free(pbi); } @@ -200,7 +182,7 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a, a->uv_height == b->uv_height && a->uv_width == b->uv_width; } -vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi, +vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP9_COMMON *cm = &pbi->common; @@ -227,17 +209,15 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi, } -vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi, +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { - VP9_COMMON *cm = &pbi->common; RefBuffer *ref_buf = NULL; - /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the - * encoder is using the frame buffers for. This is just a stub to keep the - * vpxenc --test-decode functionality working, and will be replaced in a - * later commit that adds VP9-specific controls for this functionality. - */ + // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the + // encoder is using the frame buffers for. This is just a stub to keep the + // vpxenc --test-decode functionality working, and will be replaced in a + // later commit that adds VP9-specific controls for this functionality. if (ref_frame_flag == VP9_LAST_FLAG) { ref_buf = &cm->frame_refs[0]; } else if (ref_frame_flag == VP9_GOLD_FLAG) { @@ -245,13 +225,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi, } else if (ref_frame_flag == VP9_ALT_FLAG) { ref_buf = &cm->frame_refs[2]; } else { - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame"); - return pbi->common.error.error_code; + return cm->error.error_code; } if (!equal_dimensions(ref_buf->buf, sd)) { - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Incorrect buffer dimensions"); } else { int *ref_fb_ptr = &ref_buf->idx; @@ -268,11 +248,11 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi, vp8_yv12_copy_frame(sd, ref_buf->buf); } - return pbi->common.error.error_code; + return cm->error.error_code; } -int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { +int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) { VP9_COMMON *cm = &pbi->common; if (index < 0 || index >= REF_FRAMES) @@ -283,7 +263,7 @@ int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { } /* If any buffer updating is signaled it should be done here. */ -static void swap_frame_buffers(VP9D_COMP *pbi) { +static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; VP9_COMMON *const cm = &pbi->common; @@ -307,35 +287,24 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { cm->frame_refs[ref_index].idx = INT_MAX; } -int vp9_receive_compressed_data(VP9D_COMP *pbi, +int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, const uint8_t **psource, int64_t time_stamp) { - VP9_COMMON *cm = NULL; + VP9_COMMON *const cm = &pbi->common; const uint8_t *source = *psource; int retcode = 0; - /*if(pbi->ready_for_new_data == 0) - return -1;*/ - - if (!pbi) - return -1; - - cm = &pbi->common; cm->error.error_code = VPX_CODEC_OK; - pbi->source = source; - pbi->source_sz = size; - - if (pbi->source_sz == 0) { - /* This is used to signal that we are missing frames. - * We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - * - * TODO(jkoleszar): Error concealment is undefined and non-normative - * at this point, but if it becomes so, [0] may not always be the correct - * thing to do here. - */ + if (size == 0) { + // This is used to signal that we are missing frames. + // We do not know if the missing frame(s) was supposed to update + // any of the reference buffers, but we act conservative and + // mark only the last buffer as corrupted. + // + // TODO(jkoleszar): Error concealment is undefined and non-normative + // at this point, but if it becomes so, [0] may not always be the correct + // thing to do here. if (cm->frame_refs[0].idx != INT_MAX) cm->frame_refs[0].buf->corrupted = 1; } @@ -349,14 +318,13 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; - /* We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - * - * TODO(jkoleszar): Error concealment is undefined and non-normative - * at this point, but if it becomes so, [0] may not always be the correct - * thing to do here. - */ + // We do not know if the missing frame(s) was supposed to update + // any of the reference buffers, but we act conservative and + // mark only the last buffer as corrupted. + // + // TODO(jkoleszar): Error concealment is undefined and non-normative + // at this point, but if it becomes so, [0] may not always be the correct + // thing to do here. if (cm->frame_refs[0].idx != INT_MAX) cm->frame_refs[0].buf->corrupted = 1; @@ -368,7 +336,7 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, cm->error.setjmp = 1; - retcode = vp9_decode_frame(pbi, psource); + retcode = vp9_decode_frame(pbi, source, source + size, psource); if (retcode < 0) { cm->error.error_code = VPX_CODEC_ERROR; @@ -430,13 +398,12 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, pbi->ready_for_new_data = 0; pbi->last_time_stamp = time_stamp; - pbi->source_sz = 0; cm->error.setjmp = 0; return retcode; } -int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd, +int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags) { int ret = -1; @@ -455,19 +422,12 @@ int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd, #if CONFIG_VP9_POSTPROC ret = vp9_post_proc_frame(&pbi->common, sd, flags); #else - - if (pbi->common.frame_to_show) { *sd = *pbi->common.frame_to_show; sd->y_width = pbi->common.width; sd->y_height = pbi->common.height; sd->uv_width = sd->y_width >> pbi->common.subsampling_x; sd->uv_height = sd->y_height >> pbi->common.subsampling_y; - ret = 0; - } else { - ret = -1; - } - #endif /*!CONFIG_POSTPROC*/ vp9_clear_system_state(); return ret; diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h index 4cbff45..c9dc251 100644 --- a/libvpx/vp9/decoder/vp9_decoder.h +++ b/libvpx/vp9/decoder/vp9_decoder.h @@ -31,30 +31,17 @@ typedef struct { int width; int height; int version; - int postprocess; int max_threads; int inv_tile_order; - int input_partition; } VP9D_CONFIG; -typedef enum { - VP9_LAST_FLAG = 1, - VP9_GOLD_FLAG = 2, - VP9_ALT_FLAG = 4 -} VP9_REFFRAME; - -typedef struct VP9Decompressor { +typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); DECLARE_ALIGNED(16, VP9_COMMON, common); - DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); - VP9D_CONFIG oxcf; - const uint8_t *source; - size_t source_sz; - int64_t last_time_stamp; int ready_for_new_data; @@ -72,37 +59,34 @@ typedef struct VP9Decompressor { int num_tile_workers; VP9LfSync lf_row_sync; - - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - PARTITION_CONTEXT *above_seg_context; -} VP9D_COMP; +} VP9Decoder; void vp9_initialize_dec(); -int vp9_receive_compressed_data(struct VP9Decompressor *pbi, +int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, const uint8_t **dest, int64_t time_stamp); -int vp9_get_raw_frame(struct VP9Decompressor *pbi, +int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags); -vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decompressor *pbi, +vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -vpx_codec_err_t vp9_set_reference_dec(struct VP9Decompressor *pbi, +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -int vp9_get_reference_dec(struct VP9Decompressor *pbi, +int vp9_get_reference_dec(struct VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb); -struct VP9Decompressor *vp9_create_decompressor(VP9D_CONFIG *oxcf); +struct VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf); -void vp9_remove_decompressor(struct VP9Decompressor *pbi); +void vp9_decoder_remove(struct VP9Decoder *pbi); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c index 52e78cd..860da53 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.c +++ b/libvpx/vp9/decoder/vp9_detokenize.c @@ -86,7 +86,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type, const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; - const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); + const int ref = is_inter_block(&xd->mi[0]->mbmi); int band, c = 0; const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; diff --git a/libvpx/vp9/decoder/vp9_dthread.c b/libvpx/vp9/decoder/vp9_dthread.c index 4df8509..9b124c9 100644 --- a/libvpx/vp9/decoder/vp9_dthread.c +++ b/libvpx/vp9/decoder/vp9_dthread.c @@ -99,7 +99,7 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, for (r = start; r < stop; r += num_lf_workers) { const int mi_row = r << MI_BLOCK_SIZE_LOG2; - MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride; + MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride; for (c = 0; c < sb_cols; ++c) { const int mi_col = c << MI_BLOCK_SIZE_LOG2; @@ -108,8 +108,7 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, sync_read(lf_sync, r, c); vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); - vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride, - &lfm); + vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm); for (plane = 0; plane < num_planes; ++plane) { vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); @@ -133,13 +132,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) { // VP9 decoder: Implement multi-threaded loopfilter that uses the tile // threads. -void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, +void vp9_loop_filter_frame_mt(VP9Decoder *pbi, VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, int y_only, int partial_frame) { // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + const int tile_cols = 1 << cm->log2_tile_cols; + const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); int i; // Allocate memory used in thread synchronization. @@ -169,7 +170,16 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows); // Set up loopfilter thread data. - for (i = 0; i < pbi->num_tile_workers; ++i) { + // The decoder is using num_workers instead of pbi->num_tile_workers + // because it has been observed that using more threads on the + // loopfilter, than there are tile columns in the frame will hurt + // performance on Android. This is because the system will only + // schedule the tile decode workers on cores equal to the number + // of tile columns. Then if the decoder tries to use more threads for the + // loopfilter, it will hurt performance because of contention. If the + // multithreading code changes in the future then the number of workers + // used by the loopfilter should be revisited. + for (i = 0; i < num_workers; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; LFWorkerData *const lf_data = &tile_data->lfdata; @@ -185,10 +195,10 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, lf_data->y_only = y_only; // always do all planes in decoder lf_data->lf_sync = &pbi->lf_row_sync; - lf_data->num_lf_workers = pbi->num_tile_workers; + lf_data->num_lf_workers = num_workers; // Start loopfiltering - if (i == pbi->num_tile_workers - 1) { + if (i == num_workers - 1) { vp9_worker_execute(worker); } else { vp9_worker_launch(worker); @@ -196,7 +206,7 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, } // Wait till all rows are finished - for (i = 0; i < pbi->num_tile_workers; ++i) { + for (i = 0; i < num_workers; ++i) { vp9_worker_sync(&pbi->tile_workers[i]); } } diff --git a/libvpx/vp9/decoder/vp9_dthread.h b/libvpx/vp9/decoder/vp9_dthread.h index 6d4450f..005bd7b 100644 --- a/libvpx/vp9/decoder/vp9_dthread.h +++ b/libvpx/vp9/decoder/vp9_dthread.h @@ -18,13 +18,12 @@ struct macroblockd; struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; typedef struct TileWorkerData { struct VP9Common *cm; vp9_reader bit_reader; DECLARE_ALIGNED(16, struct macroblockd, xd); - DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); // Row-based parallel loopfilter data LFWorkerData lfdata; @@ -51,7 +50,7 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync, void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows); // Multi-threaded loopfilter that uses the tile threads. -void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi, +void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi, struct VP9Common *cm, struct macroblockd *xd, int frame_filter_level, |