diff options
Diffstat (limited to 'libvpx/vp9/encoder')
46 files changed, 3852 insertions, 2950 deletions
diff --git a/libvpx/vp9/encoder/vp9_aq_complexity.c b/libvpx/vp9/encoder/vp9_aq_complexity.c new file mode 100644 index 0000000..47ad8d8 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_aq_complexity.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits.h> +#include <math.h> + +#include "vp9/common/vp9_seg_common.h" + +#include "vp9/encoder/vp9_segmentation.h" + +static const double in_frame_q_adj_ratio[MAX_SEGMENTS] = + {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + +void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + struct segmentation *const seg = &cm->seg; + + // Make SURE use of floating point in this function is safe. + vp9_clear_system_state(); + + if (cm->frame_type == KEY_FRAME || + cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { + int segment; + + // Clear down the segment map. + vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); + + // Clear down the complexity map used for rd. + vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols); + + vp9_enable_segmentation(seg); + vp9_clearall_segfeatures(seg); + + // Select delta coding method. + seg->abs_delta = SEGMENT_DELTADATA; + + // Segment 0 "Q" feature is disabled so it defaults to the baseline Q. + vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); + + // Use some of the segments for in frame Q adjustment. + for (segment = 1; segment < 2; segment++) { + const int qindex_delta = + vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, + in_frame_q_adj_ratio[segment]); + vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + } + } +} + +// Select a segment for the current SB64 +void vp9_select_in_frame_q_segment(VP9_COMP *cpi, + int mi_row, int mi_col, + int output_enabled, int projected_rate) { + VP9_COMMON *const cm = &cpi->common; + + const int mi_offset = mi_row * cm->mi_cols + mi_col; + const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int xmis = MIN(cm->mi_cols - mi_col, bw); + const int ymis = MIN(cm->mi_rows - mi_row, bh); + int complexity_metric = 64; + int x, y; + + unsigned char segment; + + if (!output_enabled) { + segment = 0; + } else { + // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). + // It is converted to bits * 256 units. + const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / + (bw * bh); + + if (projected_rate < (target_rate / 4)) { + segment = 1; + } else { + segment = 0; + } + + if (target_rate > 0) { + complexity_metric = + clamp((int)((projected_rate * 64) / target_rate), 16, 255); + } + } + + // Fill in the entires in the segment map corresponding to this SB64. + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; + cpi->complexity_map[mi_offset + y * cm->mi_cols + x] = + (unsigned char)complexity_metric; + } + } +} diff --git a/libvpx/vp9/encoder/vp9_aq_complexity.h b/libvpx/vp9/encoder/vp9_aq_complexity.h new file mode 100644 index 0000000..af031a4 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_aq_complexity.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ +#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9_COMP; + +// Select a segment for the current SB64. +void vp9_select_in_frame_q_segment(struct VP9_COMP *cpi, int mi_row, int mi_col, + int output_enabled, int projected_rate); + + +// This function sets up a set of segments with delta Q values around +// the baseline frame quantizer. +void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ diff --git a/libvpx/vp9/encoder/vp9_craq.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index 40437c7..7879091 100644 --- a/libvpx/vp9/encoder/vp9_craq.c +++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -11,7 +11,7 @@ #include <limits.h> #include <math.h> -#include "vp9/encoder/vp9_craq.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/common/vp9_seg_common.h" @@ -19,19 +19,69 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" +struct CYCLIC_REFRESH { + // Percentage of super-blocks per frame that are targeted as candidates + // for cyclic refresh. + int max_sbs_perframe; + // Maximum q-delta as percentage of base q. + int max_qdelta_perc; + // Block size below which we don't apply cyclic refresh. + BLOCK_SIZE min_block_size; + // Superblock starting index for cycling through the frame. + int sb_index; + // Controls how long a block will need to wait to be refreshed again. + int time_for_refresh; + // Actual number of (8x8) blocks that were applied delta-q (segment 1). + int num_seg_blocks; + // Actual encoding bits for segment 1. + int actual_seg_bits; + // RD mult. parameters for segment 1. + int rdmult; + // Cyclic refresh map. + signed char *map; + // Projected rate and distortion for the current superblock. + int64_t projected_rate_sb; + int64_t projected_dist_sb; + // Thresholds applied to projected rate/distortion of the superblock. + int64_t thresh_rate_sb; + int64_t thresh_dist_sb; +}; + +CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { + CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); + if (cr == NULL) + return NULL; + + cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map)); + if (cr->map == NULL) { + vpx_free(cr); + return NULL; + } + + return cr; +} + +void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { + vpx_free(cr->map); + vpx_free(cr); +} // Check if we should turn off cyclic refresh based on bitrate condition. -static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi) { +static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm, + const RATE_CONTROL *rc) { // Turn off cyclic refresh if bits available per frame is not sufficiently // larger than bit cost of segmentation. Segment map bit cost should scale // with number of seg blocks, so compare available bits to number of blocks. // Average bits available per frame = av_per_frame_bandwidth // Number of (8x8) blocks in frame = mi_rows * mi_cols; - float factor = 0.5; - int number_blocks = cpi->common.mi_rows * cpi->common.mi_cols; + const float factor = 0.5; + const int number_blocks = cm->mi_rows * cm->mi_cols; // The condition below corresponds to turning off at target bitrates: // ~24kbps for CIF, 72kbps for VGA (at 30fps). - if (cpi->rc.av_per_frame_bandwidth < factor * number_blocks) + // Also turn off at very small frame sizes, to avoid too large fraction of + // superblocks to be refreshed per frame. Threshold below is less than QCIF. + if (rc->av_per_frame_bandwidth < factor * number_blocks || + number_blocks / 64 < 5) return 0; else return 1; @@ -41,11 +91,9 @@ static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi) { // (lower-qp coding). Decision can be based on various factors, such as // size of the coding block (i.e., below min_block size rejected), coding // mode, and rate/distortion. -static int candidate_refresh_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int bsize, - int use_rd) { - CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh; +static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, + const MB_MODE_INFO *mbmi, + BLOCK_SIZE bsize, int use_rd) { if (use_rd) { // If projected rate is below the thresh_rate (well below target, // so undershoot expected), accept it for lower-qp coding. @@ -56,18 +104,18 @@ static int candidate_refresh_aq(VP9_COMP *const cpi, // 2) mode is non-zero mv and projected distortion is above thresh_dist // 3) mode is an intra-mode (we may want to allow some of this under // another thresh_dist) - else if ((bsize < cr->min_block_size) || - (mi->mbmi.mv[0].as_int != 0 && - cr->projected_dist_sb > cr->thresh_dist_sb) || - !is_inter_block(&mi->mbmi)) + else if (bsize < cr->min_block_size || + (mbmi->mv[0].as_int != 0 && + cr->projected_dist_sb > cr->thresh_dist_sb) || + !is_inter_block(mbmi)) return 0; else return 1; } else { // Rate/distortion not used for update. - if ((bsize < cr->min_block_size) || - (mi->mbmi.mv[0].as_int != 0) || - !is_inter_block(&mi->mbmi)) + if (bsize < cr->min_block_size || + mbmi->mv[0].as_int != 0 || + !is_inter_block(mbmi)) return 0; else return 1; @@ -77,33 +125,31 @@ static int candidate_refresh_aq(VP9_COMP *const cpi, // Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), // check if we should reset the segment_id, and update the cyclic_refresh map // and segmentation map. -void vp9_update_segment_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int mi_row, - int mi_col, - int bsize, - int use_rd) { - CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh; - VP9_COMMON *const cm = &cpi->common; +void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, + MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int use_rd) { + const VP9_COMMON *const cm = &cpi->common; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = MIN(cm->mi_cols - mi_col, bw); const int ymis = MIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; + const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd); // Default is to not update the refresh map. int new_map_value = cr->map[block_index]; int x = 0; int y = 0; - int current_segment = mi->mbmi.segment_id; - int refresh_this_block = candidate_refresh_aq(cpi, mi, bsize, use_rd); + // Check if we should reset the segment_id for this block. - if (current_segment && !refresh_this_block) - mi->mbmi.segment_id = 0; + if (mbmi->segment_id > 0 && !refresh_this_block) + mbmi->segment_id = 0; // Update the cyclic refresh map, to be used for setting segmentation map // for the next frame. If the block will be refreshed this frame, mark it // as clean. The magnitude of the -ve influences how long before we consider // it for refresh again. - if (mi->mbmi.segment_id == 1) { + if (mbmi->segment_id == 1) { new_map_value = -cr->time_for_refresh; } else if (refresh_this_block) { // Else if it is accepted as candidate for refresh, and has not already @@ -121,54 +167,54 @@ void vp9_update_segment_aq(VP9_COMP *const cpi, for (x = 0; x < xmis; x++) { cr->map[block_index + y * cm->mi_cols + x] = new_map_value; cpi->segmentation_map[block_index + y * cm->mi_cols + x] = - mi->mbmi.segment_id; + mbmi->segment_id; } // Keep track of actual number (in units of 8x8) of blocks in segment 1 used // for encoding this frame. - if (mi->mbmi.segment_id) + if (mbmi->segment_id) cr->num_seg_blocks += xmis * ymis; } // Setup cyclic background refresh: set delta q and segmentation map. -void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) { +void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh; + const RATE_CONTROL *const rc = &cpi->rc; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; struct segmentation *const seg = &cm->seg; - unsigned char *seg_map = cpi->segmentation_map; - int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cpi); + unsigned char *const seg_map = cpi->segmentation_map; + const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); // Don't apply refresh on key frame or enhancement layer frames. if (!apply_cyclic_refresh || - (cpi->common.frame_type == KEY_FRAME) || + (cm->frame_type == KEY_FRAME) || (cpi->svc.temporal_layer_id > 0)) { // Set segmentation map to 0 and disable. vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols); vp9_disable_segmentation(&cm->seg); - if (cpi->common.frame_type == KEY_FRAME) - cr->mb_index = 0; + if (cm->frame_type == KEY_FRAME) + cr->sb_index = 0; return; } else { int qindex_delta = 0; - int mbs_in_frame = cm->mi_rows * cm->mi_cols; - int i, x, y, block_count, bl_index, bl_index2; - int sum_map, new_value, mi_row, mi_col, xmis, ymis, qindex2; + int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; + int xmis, ymis, x, y, qindex2; // Rate target ratio to set q delta. - float rate_ratio_qdelta = 2.0; + const float rate_ratio_qdelta = 2.0; vp9_clear_system_state(); // Some of these parameters may be set via codec-control function later. - cr->max_mbs_perframe = 10; + cr->max_sbs_perframe = 10; cr->max_qdelta_perc = 50; - cr->min_block_size = BLOCK_16X16; + cr->min_block_size = BLOCK_8X8; cr->time_for_refresh = 1; // Set rate threshold to some fraction of target (and scaled by 256). - cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 2; + cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2; // Distortion threshold, quadratic in Q, scale factor to be adjusted. cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * vp9_convert_qindex_to_q(cm->base_qindex)); if (cpi->sf.use_nonrd_pick_mode) { // May want to be more conservative with thresholds in non-rd mode for now // as rate/distortion are derived from model based on prediction residual. - cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 3; + cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3; cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * vp9_convert_qindex_to_q(cm->base_qindex)); } @@ -195,73 +241,84 @@ void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) { vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); // Set the q delta for segment 1. - qindex_delta = vp9_compute_qdelta_by_rate(cpi, + qindex_delta = vp9_compute_qdelta_by_rate(rc, cm->frame_type, cm->base_qindex, rate_ratio_qdelta); // TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from // previous encoded frame. - if ((-qindex_delta) > cr->max_qdelta_perc * cm->base_qindex / 100) { + if (-qindex_delta > cr->max_qdelta_perc * cm->base_qindex / 100) qindex_delta = -cr->max_qdelta_perc * cm->base_qindex / 100; - } // Compute rd-mult for segment 1. qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ); cr->rdmult = vp9_compute_rd_mult(cpi, qindex2); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qindex_delta); - // Number of target macroblocks to get the q delta (segment 1). - block_count = cr->max_mbs_perframe * mbs_in_frame / 100; - // Set the segmentation map: cycle through the macroblocks, starting at + + sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + sbs_in_frame = sb_cols * sb_rows; + // Number of target superblocks to get the q delta (segment 1). + block_count = cr->max_sbs_perframe * sbs_in_frame / 100; + // Set the segmentation map: cycle through the superblocks, starting at // cr->mb_index, and stopping when either block_count blocks have been found // to be refreshed, or we have passed through whole frame. - // Note the setting of seg_map below is done in two steps (one over 8x8) - // and then another over SB, in order to keep the value constant over SB. - // TODO(marpan): Do this in one pass in SB order. - assert(cr->mb_index < mbs_in_frame); - i = cr->mb_index; + assert(cr->sb_index < sbs_in_frame); + i = cr->sb_index; do { - // If the macroblock is as a candidate for clean up then mark it - // for possible boost/refresh (segment 1). The segment id may get reset to - // 0 later if the macroblock gets coded anything other than ZEROMV. - if (cr->map[i] == 0) { - seg_map[i] = 1; - block_count--; - } else if (cr->map[i] < 0) { - cr->map[i]++; + int sum_map = 0; + // Get the mi_row/mi_col corresponding to superblock index i. + int sb_row_index = (i / sb_cols); + int sb_col_index = i - sb_row_index * sb_cols; + int mi_row = sb_row_index * MI_BLOCK_SIZE; + int mi_col = sb_col_index * MI_BLOCK_SIZE; + assert(mi_row >= 0 && mi_row < cm->mi_rows); + assert(mi_col >= 0 && mi_col < cm->mi_cols); + bl_index = mi_row * cm->mi_cols + mi_col; + // Loop through all 8x8 blocks in superblock and update map. + xmis = MIN(cm->mi_cols - mi_col, + num_8x8_blocks_wide_lookup[BLOCK_64X64]); + ymis = MIN(cm->mi_rows - mi_row, + num_8x8_blocks_high_lookup[BLOCK_64X64]); + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + const int bl_index2 = bl_index + y * cm->mi_cols + x; + // If the block is as a candidate for clean up then mark it + // for possible boost/refresh (segment 1). The segment id may get + // reset to 0 later if block gets coded anything other than ZEROMV. + if (cr->map[bl_index2] == 0) { + seg_map[bl_index2] = 1; + sum_map++; + } else if (cr->map[bl_index2] < 0) { + cr->map[bl_index2]++; + } + } + } + // Enforce constant segment over superblock. + // If segment is partial over superblock, reset to either all 1 or 0. + if (sum_map > 0 && sum_map < xmis * ymis) { + const int new_value = (sum_map >= xmis * ymis / 2); + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) + seg_map[bl_index + y * cm->mi_cols + x] = new_value; } i++; - if (i == mbs_in_frame) { + if (i == sbs_in_frame) { i = 0; } - } while (block_count && i != cr->mb_index); - cr->mb_index = i; - // Enforce constant segment map over superblock. - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { - bl_index = mi_row * cm->mi_cols + mi_col; - xmis = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - ymis = num_8x8_blocks_high_lookup[BLOCK_64X64]; - xmis = MIN(cm->mi_cols - mi_col, xmis); - ymis = MIN(cm->mi_rows - mi_row, ymis); - sum_map = 0; - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - bl_index2 = bl_index + y * cm->mi_cols + x; - sum_map += seg_map[bl_index2]; - } - new_value = 0; - // If segment is partial over superblock, reset. - if (sum_map > 0 && sum_map < xmis * ymis) { - if (sum_map < xmis * ymis / 2) - new_value = 0; - else - new_value = 1; - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - bl_index2 = bl_index + y * cm->mi_cols + x; - seg_map[bl_index2] = new_value; - } - } - } + if (sum_map >= xmis * ymis /2) + block_count--; + } while (block_count && i != cr->sb_index); + cr->sb_index = i; } } + +void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, + int64_t rate_sb, int64_t dist_sb) { + cr->projected_rate_sb = rate_sb; + cr->projected_dist_sb = dist_sb; +} + +int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) { + return cr->rdmult; +} diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h new file mode 100644 index 0000000..f556d65 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ +#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ + +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9_COMP; + +struct CYCLIC_REFRESH; +typedef struct CYCLIC_REFRESH CYCLIC_REFRESH; + +CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols); + +void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr); + +// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), +// check if we should reset the segment_id, and update the cyclic_refresh map +// and segmentation map. +void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi, + MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int use_rd); + +// Setup cyclic background refresh: set delta q and segmentation map. +void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi); + +void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, + int64_t rate_sb, int64_t dist_sb); + +int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ diff --git a/libvpx/vp9/encoder/vp9_vaq.c b/libvpx/vp9/encoder/vp9_aq_variance.c index c71c171..ae2a163 100644 --- a/libvpx/vp9/encoder/vp9_vaq.c +++ b/libvpx/vp9/encoder/vp9_aq_variance.c @@ -10,7 +10,7 @@ #include <math.h> -#include "vp9/encoder/vp9_vaq.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/common/vp9_seg_common.h" @@ -99,7 +99,7 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) { continue; } - qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i)); + qindex_delta = vp9_compute_qdelta(&cpi->rc, base_q, base_q * Q_RATIO(i)); vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta); vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q); diff --git a/libvpx/vp9/encoder/vp9_vaq.h b/libvpx/vp9/encoder/vp9_aq_variance.h index c73114a..381fe50 100644 --- a/libvpx/vp9/encoder/vp9_vaq.h +++ b/libvpx/vp9/encoder/vp9_aq_variance.h @@ -9,8 +9,8 @@ */ -#ifndef VP9_ENCODER_VP9_VAQ_H_ -#define VP9_ENCODER_VP9_VAQ_H_ +#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_ +#define VP9_ENCODER_VP9_AQ_VARIANCE_H_ #include "vp9/encoder/vp9_onyx_int.h" @@ -31,4 +31,4 @@ int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_VAQ_H_ +#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_ diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c index 1b4a6cc..8d2afb9 100644 --- a/libvpx/vp9/encoder/vp9_bitstream.c +++ b/libvpx/vp9/encoder/vp9_bitstream.c @@ -192,7 +192,7 @@ static void write_segment_id(vp9_writer *w, const struct segmentation *seg, static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) { const VP9_COMMON *const cm = &cpi->common; const MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int is_compound = has_second_ref(mbmi); const int segment_id = mbmi->segment_id; @@ -336,7 +336,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8, const MACROBLOCKD *const xd = &cpi->mb.e_mbd; const struct segmentation *const seg = &cm->seg; const MODE_INFO *const mi = mi_8x8[0]; - const MODE_INFO *const above_mi = mi_8x8[-xd->mode_info_stride]; + const MODE_INFO *const above_mi = mi_8x8[-xd->mi_stride]; const MODE_INFO *const left_mi = xd->left_available ? mi_8x8[-1] : NULL; const MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; @@ -375,15 +375,15 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &cpi->mb.e_mbd; MODE_INFO *m; - xd->mi_8x8 = cm->mi_grid_visible + (mi_row * cm->mode_info_stride + mi_col); - m = xd->mi_8x8[0]; + xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + m = xd->mi[0]; set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type], mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type], cm->mi_rows, cm->mi_cols); if (frame_is_intra_only(cm)) { - write_mb_modes_kf(cpi, xd->mi_8x8, w); + write_mb_modes_kf(cpi, xd->mi, w); } else { pack_inter_mode_mvs(cpi, m, w); } @@ -392,12 +392,10 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, pack_mb_tokens(w, tok, tok_end); } -static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col, +static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd, + int hbs, int mi_row, int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) { - VP9_COMMON *const cm = &cpi->common; - const int ctx = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vp9_prob *const probs = get_partition_probs(cm, ctx); const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; @@ -415,21 +413,24 @@ static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col, } } -static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, +static void write_modes_sb(VP9_COMP *cpi, + const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int bsl = b_width_log2(bsize); const int bs = (1 << bsl) / 4; PARTITION_TYPE partition; BLOCK_SIZE subsize; - MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mode_info_stride + mi_col]; + MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; partition = partition_lookup[bsl][m->mbmi.sb_type]; - write_partition(cpi, bs, mi_row, mi_col, partition, bsize, w); + write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); @@ -465,29 +466,30 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } -static void write_modes(VP9_COMP *cpi, const TileInfo *const tile, +static void write_modes(VP9_COMP *cpi, + const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end) { int mi_row, mi_col; for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { - vp9_zero(cpi->left_seg_context); + vp9_zero(cpi->mb.e_mbd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64); + write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, + BLOCK_64X64); } } -static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) { +static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size, + vp9_coeff_stats *coef_branch_ct) { vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[tx_size]; vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size]; unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = cpi->common.counts.eob_branch[tx_size]; - vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[tx_size]; int i, j, k, l, m; for (i = 0; i < PLANE_TYPES; ++i) { @@ -510,16 +512,16 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) { } static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, - TX_SIZE tx_size) { + TX_SIZE tx_size, + vp9_coeff_stats *frame_branch_ct) { vp9_coeff_probs_model *new_frame_coef_probs = cpi->frame_coef_probs[tx_size]; vp9_coeff_probs_model *old_frame_coef_probs = cpi->common.fc.coef_probs[tx_size]; - vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size]; const vp9_prob upd = DIFF_UPDATE_PROB; const int entropy_nodes_update = UNCONSTRAINED_NODES; int i, j, k, l, t; switch (cpi->sf.use_fast_coef_updates) { - case 0: { + case TWO_LOOP: { /* dry run to see if there is any udpate at all needed */ int savings = 0; int update[2] = {0, 0}; @@ -594,14 +596,14 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, return; } - case 1: - case 2: { + case ONE_LOOP: + case ONE_LOOP_REDUCED: { const int prev_coef_contexts_to_update = - cpi->sf.use_fast_coef_updates == 2 ? COEFF_CONTEXTS >> 1 - : COEFF_CONTEXTS; + cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED ? + COEFF_CONTEXTS >> 1 : COEFF_CONTEXTS; const int coef_band_to_update = - cpi->sf.use_fast_coef_updates == 2 ? COEF_BANDS >> 1 - : COEF_BANDS; + cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED ? + COEF_BANDS >> 1 : COEF_BANDS; int updates = 0; int noupdates_before_first = 0; for (i = 0; i < PLANE_TYPES; ++i) { @@ -667,13 +669,15 @@ static void update_coef_probs(VP9_COMP *cpi, vp9_writer* w) { const TX_MODE tx_mode = cpi->common.tx_mode; const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; TX_SIZE tx_size; + vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES]; + vp9_clear_system_state(); for (tx_size = TX_4X4; tx_size <= TX_32X32; ++tx_size) - build_tree_distribution(cpi, tx_size); + build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size]); for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - update_coef_probs_common(w, cpi, tx_size); + update_coef_probs_common(w, cpi, tx_size, frame_branch_ct[tx_size]); } static void encode_loopfilter(struct loopfilter *lf, @@ -930,7 +934,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; - vpx_memset(cpi->above_seg_context, 0, sizeof(*cpi->above_seg_context) * + vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols)); tok[0][0] = cpi->tok; @@ -1027,19 +1031,22 @@ static void write_sync_code(struct vp9_write_bit_buffer *wb) { vp9_wb_write_literal(wb, VP9_SYNC_CODE_2, 8); } +static void write_profile(BITSTREAM_PROFILE profile, + struct vp9_write_bit_buffer *wb) { + assert(profile < MAX_PROFILES); + vp9_wb_write_bit(wb, profile & 1); + vp9_wb_write_bit(wb, profile >> 1); +} + static void write_uncompressed_header(VP9_COMP *cpi, struct vp9_write_bit_buffer *wb) { VP9_COMMON *const cm = &cpi->common; vp9_wb_write_literal(wb, VP9_FRAME_MARKER, 2); - // bitstream version. - // 00 - profile 0. 4:2:0 only - // 10 - profile 1. adds 4:4:4, 4:2:2, alpha - vp9_wb_write_bit(wb, cm->version); - vp9_wb_write_bit(wb, 0); + write_profile(cm->profile, wb); - vp9_wb_write_bit(wb, 0); + vp9_wb_write_bit(wb, 0); // show_existing_frame vp9_wb_write_bit(wb, cm->frame_type); vp9_wb_write_bit(wb, cm->show_frame); vp9_wb_write_bit(wb, cm->error_resilient_mode); @@ -1047,16 +1054,20 @@ static void write_uncompressed_header(VP9_COMP *cpi, if (cm->frame_type == KEY_FRAME) { const COLOR_SPACE cs = UNKNOWN; write_sync_code(wb); + if (cm->profile > PROFILE_1) { + assert(cm->bit_depth > BITS_8); + vp9_wb_write_bit(wb, cm->bit_depth - BITS_10); + } vp9_wb_write_literal(wb, cs, 3); if (cs != SRGB) { vp9_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { vp9_wb_write_bit(wb, cm->subsampling_x); vp9_wb_write_bit(wb, cm->subsampling_y); vp9_wb_write_bit(wb, 0); // has extra plane } } else { - assert(cm->version == 1); + assert(cm->profile == PROFILE_1); vp9_wb_write_bit(wb, 0); // has extra plane } @@ -1184,7 +1195,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) { uint8_t *data = dest; - size_t first_part_size; + size_t first_part_size, uncompressed_hdr_size; struct vp9_write_bit_buffer wb = {data, 0}; struct vp9_write_bit_buffer saved_wb; @@ -1192,7 +1203,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) { saved_wb = wb; vp9_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size - data += vp9_rb_bytes_written(&wb); + uncompressed_hdr_size = vp9_rb_bytes_written(&wb); + data += uncompressed_hdr_size; vp9_compute_update_table(); diff --git a/libvpx/vp9/encoder/vp9_block.h b/libvpx/vp9/encoder/vp9_block.h index 888984c..7729d84 100644 --- a/libvpx/vp9/encoder/vp9_block.h +++ b/libvpx/vp9/encoder/vp9_block.h @@ -157,7 +157,6 @@ struct macroblock { // note that token_costs is the cost when eob node is skipped vp9_coeff_cost token_costs[TX_SIZES]; - DECLARE_ALIGNED(16, uint8_t, token_cache[1024]); int optimize; @@ -197,7 +196,8 @@ struct macroblock { // TODO(jingning): the variables used here are little complicated. need further // refactoring on organizing the temporary buffers, when recursive // partition down to 4x4 block size is enabled. -static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) { +static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, + BLOCK_SIZE bsize) { switch (bsize) { case BLOCK_64X64: return &x->sb64_context; diff --git a/libvpx/vp9/encoder/vp9_craq.h b/libvpx/vp9/encoder/vp9_craq.h deleted file mode 100644 index 1f81f3e..0000000 --- a/libvpx/vp9/encoder/vp9_craq.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_VP9_CRAQ_H_ -#define VP9_ENCODER_VP9_CRAQ_H_ - -#include "vp9/encoder/vp9_onyx_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Check if we should turn off cyclic refresh based on bitrate condition. -static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi); - -// Check if this coding block, of size bsize, should be considered for refresh -// (lower-qp coding). -static int candidate_refresh_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int bsize, - int use_rd); - -// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), -// check if we should reset the segment_id, and update the cyclic_refresh map -// and segmentation map. -void vp9_update_segment_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int mi_row, - int mi_col, - int bsize, - int use_rd); - -// Setup cyclic background refresh: set delta q and segmentation map. -void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_CRAQ_H_ diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c index 2f6c33d..61a5022 100644 --- a/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/libvpx/vp9/encoder/vp9_encodeframe.c @@ -30,6 +30,9 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_aq_complexity.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" @@ -38,8 +41,6 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_tokenize.h" -#include "vp9/encoder/vp9_vaq.h" -#include "vp9/encoder/vp9_craq.h" #define GF_ZEROMV_ZBIN_BOOST 0 #define LF_ZEROMV_ZBIN_BOOST 0 @@ -162,15 +163,14 @@ static INLINE void set_modeinfo_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col) { - const int idx_str = xd->mode_info_stride * mi_row + mi_col; - xd->mi_8x8 = cm->mi_grid_visible + idx_str; - xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; - xd->mi_8x8[0] = cm->mi + idx_str; + const int idx_str = xd->mi_stride * mi_row + mi_col; + xd->mi = cm->mi_grid_visible + idx_str; + xd->mi[0] = cm->mi + idx_str; } -static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col, +static int is_block_in_mb_map(const VP9_COMP *cpi, int mi_row, int mi_col, BLOCK_SIZE bsize) { - VP9_COMMON *const cm = &cpi->common; + const VP9_COMMON *const cm = &cpi->common; const int mb_rows = cm->mb_rows; const int mb_cols = cm->mb_cols; const int mb_row = mi_row >> 1; @@ -194,6 +194,16 @@ static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col, return 0; } +static int check_active_map(const VP9_COMP *cpi, const MACROBLOCK *x, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + if (cpi->active_map_enabled && !x->e_mbd.lossless) { + return is_block_in_mb_map(cpi, mi_row, mi_col, bsize); + } else { + return 1; + } +} + static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; @@ -207,20 +217,15 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, const int idx_map = mb_row * cm->mb_cols + mb_col; const struct segmentation *const seg = &cm->seg; - set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col); + set_skip_context(xd, mi_row, mi_col); // Activity map pointer x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; - - if (cpi->active_map_enabled && !x->e_mbd.lossless) { - x->in_active_map = is_block_in_mb_map(cpi, mi_row, mi_col, bsize); - } else { - x->in_active_map = 1; - } + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); set_modeinfo_offsets(cm, xd, mi_row, mi_col); - mbmi = &xd->mi_8x8[0]->mbmi; + mbmi = &xd->mi[0]->mbmi; // Set up destination pointers. vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); @@ -253,22 +258,6 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, } vp9_init_plane_quantizers(cpi, x); - if (seg->enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - const int y = mb_row & ~3; - const int x = mb_col & ~3; - const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); - const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); - const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1; - const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1; - - cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) - << 16) / cm->MBs; - } - x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; } else { mbmi->segment_id = 0; @@ -276,19 +265,18 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, } } -static void duplicate_modeinfo_in_sb(VP9_COMMON * const cm, +static void duplicate_mode_info_in_sb(VP9_COMMON * const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { const int block_width = num_8x8_blocks_wide_lookup[bsize]; const int block_height = num_8x8_blocks_high_lookup[bsize]; - const int mis = xd->mode_info_stride; int i, j; for (j = 0; j < block_height; ++j) for (i = 0; i < block_width; ++i) { if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols) - xd->mi_8x8[j * mis + i] = xd->mi_8x8[0]; + xd->mi[j * xd->mi_stride + i] = xd->mi[0]; } } @@ -299,8 +287,8 @@ static void set_block_size(VP9_COMP * const cpi, if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { MACROBLOCKD *const xd = &cpi->mb.e_mbd; set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col); - xd->mi_8x8[0]->mbmi.sb_type = bsize; - duplicate_modeinfo_in_sb(&cpi->common, xd, mi_row, mi_col, bsize); + xd->mi[0]->mbmi.sb_type = bsize; + duplicate_mode_info_in_sb(&cpi->common, xd, mi_row, mi_col, bsize); } } @@ -496,13 +484,13 @@ static void choose_partitioning(VP9_COMP *cpi, if (cm->frame_type != KEY_FRAME) { vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf); - xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_64X64; + xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME; + xd->mi[0]->mbmi.sb_type = BLOCK_64X64; vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, - xd->mi_8x8[0]->mbmi.ref_mvs[LAST_FRAME], + xd->mi[0]->mbmi.ref_mvs[LAST_FRAME], &nearest_mv, &near_mv); - xd->mi_8x8[0]->mbmi.mv[0] = nearest_mv; + xd->mi[0]->mbmi.mv[0] = nearest_mv; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64); d = xd->plane[0].dst.buf; @@ -829,52 +817,6 @@ static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { adjust_act_zbin(cpi, x); } -// Select a segment for the current SB64 -static void select_in_frame_q_segment(VP9_COMP *cpi, - int mi_row, int mi_col, - int output_enabled, int projected_rate) { - VP9_COMMON *const cm = &cpi->common; - - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; - const int xmis = MIN(cm->mi_cols - mi_col, bw); - const int ymis = MIN(cm->mi_rows - mi_row, bh); - int complexity_metric = 64; - int x, y; - - unsigned char segment; - - if (!output_enabled) { - segment = 0; - } else { - // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). - // It is converted to bits * 256 units - const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / - (bw * bh); - - if (projected_rate < (target_rate / 4)) { - segment = 1; - } else { - segment = 0; - } - - if (target_rate > 0) { - complexity_metric = - clamp((int)((projected_rate * 64) / target_rate), 16, 255); - } - } - - // Fill in the entires in the segment map corresponding to this SB64 - for (y = 0; y < ymis; y++) { - for (x = 0; x < xmis; x++) { - cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; - cpi->complexity_map[mi_offset + y * cm->mi_cols + x] = - (unsigned char)complexity_metric; - } - } -} - static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled) { @@ -885,32 +827,37 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; MODE_INFO *mi = &ctx->mic; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - MODE_INFO *mi_addr = xd->mi_8x8[0]; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi_addr = xd->mi[0]; const struct segmentation *const seg = &cm->seg; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; int max_plane; assert(mi->mbmi.sb_type == bsize); - // For in frame adaptive Q copy over the chosen segment id into the - // mode innfo context for the chosen mode / partition. - if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ || - cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && - output_enabled) { - // Check for reseting segment_id and update cyclic map. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) { - vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1); + *mi_addr = *mi; + + // If segmentation in use + if (seg->enabled && output_enabled) { + // For in frame complexity AQ copy the segment id from the segment map. + if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mi_addr->mbmi.segment_id = + vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } + // Else for cyclic refresh mode update the segment map, set the segment id + // and then update the quantizer. + else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, 1); vp9_init_plane_quantizers(cpi, x); } - mi->mbmi.segment_id = xd->mi_8x8[0]->mbmi.segment_id; } - *mi_addr = *mi; - max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1; for (i = 0; i < max_plane; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; @@ -932,7 +879,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, for (x_idx = 0; x_idx < mi_width; x_idx++) if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { - xd->mi_8x8[x_idx + y * mis] = mi_addr; + xd->mi[x_idx + y * mis] = mi_addr; } if (cpi->oxcf.aq_mode) @@ -1051,7 +998,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, } set_offsets(cpi, tile, mi_row, mi_col, bsize); - mbmi = &xd->mi_8x8[0]->mbmi; + mbmi = &xd->mi[0]->mbmi; mbmi->sb_type = bsize; for (i = 0; i < MAX_MB_PLANE; ++i) { @@ -1101,12 +1048,12 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, (mi_col <= 1) || (mi_col >= (cm->mi_cols - 2)); if (!is_edge && (complexity > 128)) x->rdmult += ((x->rdmult * (complexity - 128)) / 256); - } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + } else if (aq_mode == CYCLIC_REFRESH_AQ) { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map - : cm->last_frame_seg_map; + : cm->last_frame_seg_map; // If segment 1, use rdmult for that segment. if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col)) - x->rdmult = cpi->cyclic_refresh.rdmult; + x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); } // Find best coding mode & reconstruct the MB so it is available @@ -1129,8 +1076,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, vp9_clear_system_state(); *totalrate = (int)round(*totalrate * rdmult_ratio); } - } else if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) || - (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)) { + } else if (aq_mode == COMPLEXITY_AQ || aq_mode == CYCLIC_REFRESH_AQ) { x->rdmult = orig_rdmult; } } @@ -1139,7 +1085,7 @@ static void update_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const MACROBLOCK *const x = &cpi->mb; const MACROBLOCKD *const xd = &x->e_mbd; - const MODE_INFO *const mi = xd->mi_8x8[0]; + const MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; if (!frame_is_intra_only(cm)) { @@ -1206,21 +1152,21 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, int mi_height = num_8x8_blocks_high_lookup[bsize]; for (p = 0; p < MAX_MB_PLANE; p++) { vpx_memcpy( - cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), + xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), a + num_4x4_blocks_wide * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); vpx_memcpy( - cpi->left_context[p] + xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), l + num_4x4_blocks_high * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(cpi->above_seg_context + mi_col, sa, - sizeof(*cpi->above_seg_context) * mi_width); - vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl, - sizeof(cpi->left_seg_context[0]) * mi_height); + vpx_memcpy(xd->above_seg_context + mi_col, sa, + sizeof(*xd->above_seg_context) * mi_width); + vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, + sizeof(xd->left_seg_context[0]) * mi_height); } static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], @@ -1239,20 +1185,20 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, for (p = 0; p < MAX_MB_PLANE; ++p) { vpx_memcpy( a + num_4x4_blocks_wide * p, - cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), + xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); vpx_memcpy( l + num_4x4_blocks_high * p, - cpi->left_context[p] + xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(sa, cpi->above_seg_context + mi_col, - sizeof(*cpi->above_seg_context) * mi_width); - vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK), - sizeof(cpi->left_seg_context[0]) * mi_height); + vpx_memcpy(sa, xd->above_seg_context + mi_col, + sizeof(*xd->above_seg_context) * mi_width); + vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), + sizeof(xd->left_seg_context[0]) * mi_height); } static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, @@ -1284,6 +1230,8 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, int output_enabled, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; @@ -1293,8 +1241,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, return; if (bsize >= BLOCK_8X8) { - ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = *get_sb_partitioning(x, bsize); } else { ctx = 0; @@ -1349,8 +1296,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } // Check to see if the given partition size is allowed for a specified number @@ -1382,7 +1328,7 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO **mi_8x8, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int row8x8_remaining = tile->mi_row_end - mi_row; int col8x8_remaining = tile->mi_col_end - mi_col; int block_row, block_col; @@ -1418,15 +1364,79 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } +static void constrain_copy_partitioning(VP9_COMP *const cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + MODE_INFO **prev_mi_8x8, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mis = cm->mi_stride; + const int row8x8_remaining = tile->mi_row_end - mi_row; + const int col8x8_remaining = tile->mi_col_end - mi_col; + MODE_INFO *const mi_upper_left = cm->mi + mi_row * mis + mi_col; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + int block_row, block_col; + + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); + + // If the SB64 if it is all "in image". + if ((col8x8_remaining >= MI_BLOCK_SIZE) && + (row8x8_remaining >= MI_BLOCK_SIZE)) { + for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { + for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { + const int index = block_row * mis + block_col; + MODE_INFO *prev_mi = prev_mi_8x8[index]; + const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + // Use previous partition if block size is not larger than bsize. + if (prev_mi && sb_type <= bsize) { + int block_row2, block_col2; + for (block_row2 = 0; block_row2 < bh; ++block_row2) { + for (block_col2 = 0; block_col2 < bw; ++block_col2) { + const int index2 = (block_row + block_row2) * mis + + block_col + block_col2; + prev_mi = prev_mi_8x8[index2]; + if (prev_mi) { + const ptrdiff_t offset = prev_mi - cm->prev_mi; + mi_8x8[index2] = cm->mi + offset; + mi_8x8[index2]->mbmi.sb_type = prev_mi->mbmi.sb_type; + } + } + } + } else { + // Otherwise, use fixed partition of size bsize. + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = bsize; + } + } + } + } else { + // Else this is a partial SB64, copy previous partition. + for (block_row = 0; block_row < 8; ++block_row) { + for (block_col = 0; block_col < 8; ++block_col) { + MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; + const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + if (prev_mi) { + const ptrdiff_t offset = prev_mi - cm->prev_mi; + mi_8x8[block_row * mis + block_col] = cm->mi + offset; + mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type; + } + } + } + } +} + static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, MODE_INFO **prev_mi_8x8) { - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int block_row, block_col; for (block_row = 0; block_row < 8; ++block_row) { for (block_col = 0; block_col < 8; ++block_col) { MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + if (prev_mi) { const ptrdiff_t offset = prev_mi - cm->prev_mi; mi_8x8[block_row * mis + block_col] = cm->mi + offset; @@ -1436,8 +1446,127 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, } } +const struct { + int row; + int col; +} coord_lookup[16] = { + // 32x32 index = 0 + {0, 0}, {0, 2}, {2, 0}, {2, 2}, + // 32x32 index = 1 + {0, 4}, {0, 6}, {2, 4}, {2, 6}, + // 32x32 index = 2 + {4, 0}, {4, 2}, {6, 0}, {6, 2}, + // 32x32 index = 3 + {4, 4}, {4, 6}, {6, 4}, {6, 6}, +}; + +static void set_source_var_based_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + int mi_row, int mi_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *x = &cpi->mb; + const int mis = cm->mi_stride; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; + int r, c; + MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; + + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); + + // In-image SB64 + if ((col8x8_remaining >= MI_BLOCK_SIZE) && + (row8x8_remaining >= MI_BLOCK_SIZE)) { + const int src_stride = x->plane[0].src.stride; + const int pre_stride = cpi->Last_Source->y_stride; + const uint8_t *src = x->plane[0].src.buf; + const int pre_offset = (mi_row * MI_SIZE) * pre_stride + + (mi_col * MI_SIZE); + const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset; + const int thr_32x32 = cpi->sf.source_var_thresh; + const int thr_64x64 = thr_32x32 << 1; + int i, j; + int index; + diff d32[4]; + int use16x16 = 0; + + for (i = 0; i < 4; i++) { + diff d16[4]; + + for (j = 0; j < 4; j++) { + int b_mi_row = coord_lookup[i * 4 + j].row; + int b_mi_col = coord_lookup[i * 4 + j].col; + int b_offset = b_mi_row * MI_SIZE * src_stride + + b_mi_col * MI_SIZE; + + vp9_get_sse_sum_16x16(src + b_offset, + src_stride, + pre_src + b_offset, + pre_stride, &d16[j].sse, &d16[j].sum); + + d16[j].var = d16[j].sse - + (((uint32_t)d16[j].sum * d16[j].sum) >> 8); + + index = b_mi_row * mis + b_mi_col; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_16X16; + + // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition + // size to further improve quality. + } + + if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 && + d16[2].var < thr_32x32 && d16[3].var < thr_32x32) { + d32[i].sse = d16[0].sse; + d32[i].sum = d16[0].sum; + + for (j = 1; j < 4; j++) { + d32[i].sse += d16[j].sse; + d32[i].sum += d16[j].sum; + } + + d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10); + + index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_32X32; + + if (!((cm->current_video_frame - 1) % + cpi->sf.search_type_check_frequency)) + cpi->use_large_partition_rate += 1; + } else { + use16x16 = 1; + } + } + + if (!use16x16) { + if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 && + d32[2].var < thr_64x64 && d32[3].var < thr_64x64) { + mi_8x8[0] = mi_upper_left; + mi_8x8[0]->mbmi.sb_type = BLOCK_64X64; + } + } + } else { // partial in-image SB64 + BLOCK_SIZE bsize = BLOCK_16X16; + int bh = num_8x8_blocks_high_lookup[bsize]; + int bw = num_8x8_blocks_wide_lookup[bsize]; + + for (r = 0; r < MI_BLOCK_SIZE; r += bh) { + for (c = 0; c < MI_BLOCK_SIZE; c += bw) { + int index = r * mis + c; + // Find a partition size that fits + bsize = find_partition_size(bsize, + (row8x8_remaining - r), + (col8x8_remaining - c), &bh, &bw); + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = bsize; + } + } + } +} + static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int block_row, block_col; if (cm->prev_mi) { @@ -1455,22 +1584,21 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { return 0; } -static void update_state_rt(VP9_COMP *cpi, const PICK_MODE_CONTEXT *ctx, +static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, int bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; - // TODO(jingning) We might need PICK_MODE_CONTEXT to buffer coding modes - // associated with variable block sizes. Otherwise, remove this ctx - // from argument list. - (void)ctx; + *(xd->mi[0]) = ctx->mic; - // Check for reseting segment_id and update cyclic map. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) { - vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1); + // For in frame adaptive Q, check for reseting the segment_id and updating + // the cyclic refresh map. + if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled) { + vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, 1); vp9_init_plane_quantizers(cpi, x); } @@ -1482,11 +1610,13 @@ static void update_state_rt(VP9_COMP *cpi, const PICK_MODE_CONTEXT *ctx, ++cm->counts.switchable_interp[pred_ctx][mbmi->interp_filter]; } } + + x->skip = ctx->skip; } static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, - TOKENEXTRA **tp, int mi_row, int mi_col, - int output_enabled, BLOCK_SIZE bsize) { + TOKENEXTRA **tp, int mi_row, int mi_col, + int output_enabled, BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; if (bsize < BLOCK_8X8) { @@ -1495,6 +1625,7 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, if (x->ab_index > 0) return; } + set_offsets(cpi, tile, mi_row, mi_col, bsize); update_state_rt(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize); @@ -1510,6 +1641,8 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, int output_enabled, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; @@ -1520,10 +1653,9 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, if (bsize >= BLOCK_8X8) { MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const int idx_str = xd->mode_info_stride * mi_row + mi_col; + const int idx_str = xd->mi_stride * mi_row + mi_col; MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str; - ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = mi_8x8[0]->mbmi.sb_type; } else { ctx = 0; @@ -1582,8 +1714,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } static void rd_use_partition(VP9_COMP *cpi, @@ -1594,12 +1725,10 @@ static void rd_use_partition(VP9_COMP *cpi, int do_recon) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - const int mis = cm->mode_info_stride; + MACROBLOCKD *const xd = &x->e_mbd; + const int mis = cm->mi_stride; const int bsl = b_width_log2(bsize); - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - const int ms = num_4x4_blocks_wide / 2; - const int mh = num_4x4_blocks_high / 2; + const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; const int bss = (1 << bsl) / 4; int i, pl; PARTITION_TYPE partition = PARTITION_NONE; @@ -1618,10 +1747,14 @@ static void rd_use_partition(VP9_COMP *cpi, BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; + int do_partition_search = 1; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + assert(num_4x4_blocks_wide_lookup[bsize] == + num_4x4_blocks_high_lookup[bsize]); + partition = partition_lookup[bsl][bs_type]; subsize = get_subsize(bsize, partition); @@ -1641,9 +1774,22 @@ static void rd_use_partition(VP9_COMP *cpi, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); + } else { + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } - if (cpi->sf.partition_search_type == SEARCH_PARTITION && + if (!x->in_active_map) { + do_partition_search = 0; + if (mi_row + (mi_step >> 1) < cm->mi_rows && + mi_col + (mi_step >> 1) < cm->mi_cols) { + *(get_sb_partitioning(x, bsize)) = bsize; + bs_type = mi_8x8[0]->mbmi.sb_type = bsize; + subsize = bsize; + partition = PARTITION_NONE; + } + } + if (do_partition_search && + cpi->sf.partition_search_type == SEARCH_PARTITION && cpi->sf.adjust_partitioning_from_last_frame) { // Check if any of the sub blocks are further split. if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { @@ -1661,15 +1807,13 @@ static void rd_use_partition(VP9_COMP *cpi, // If partition is not none try none unless each of the 4 splits are split // even further.. if (partition != PARTITION_NONE && !splits_below && - mi_row + (ms >> 1) < cm->mi_rows && - mi_col + (ms >> 1) < cm->mi_cols) { + mi_row + (mi_step >> 1) < cm->mi_rows && + mi_col + (mi_step >> 1) < cm->mi_cols) { *(get_sb_partitioning(x, bsize)) = bsize; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize, get_block_context(x, bsize), INT64_MAX); - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (none_rate < INT_MAX) { none_rate += x->partition_cost[pl][PARTITION_NONE]; @@ -1694,14 +1838,14 @@ static void rd_use_partition(VP9_COMP *cpi, &last_part_dist, subsize, get_block_context(x, subsize), INT64_MAX); if (last_part_rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) { + bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { int rt = 0; int64_t dt = 0; update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt, subsize, get_block_context(x, subsize), INT64_MAX); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; @@ -1719,14 +1863,14 @@ static void rd_use_partition(VP9_COMP *cpi, &last_part_dist, subsize, get_block_context(x, subsize), INT64_MAX); if (last_part_rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) { + bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { int rt = 0; int64_t dt = 0; update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt, subsize, get_block_context(x, subsize), INT64_MAX); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; @@ -1742,8 +1886,8 @@ static void rd_use_partition(VP9_COMP *cpi, last_part_rate = 0; last_part_dist = 0; for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (ms >> 1); - int y_idx = (i >> 1) * (ms >> 1); + int x_idx = (i & 1) * (mi_step >> 1); + int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; int rt; int64_t dt; @@ -1769,18 +1913,20 @@ static void rd_use_partition(VP9_COMP *cpi, assert(0); } - pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (last_part_rate < INT_MAX) { last_part_rate += x->partition_cost[pl][partition]; last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist); } - if (cpi->sf.adjust_partitioning_from_last_frame + if (do_partition_search + && cpi->sf.adjust_partitioning_from_last_frame && cpi->sf.partition_search_type == SEARCH_PARTITION && partition != PARTITION_SPLIT && bsize > BLOCK_8X8 - && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows) - && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) { + && (mi_row + mi_step < cm->mi_rows || + mi_row + (mi_step >> 1) == cm->mi_rows) + && (mi_col + mi_step < cm->mi_cols || + mi_col + (mi_step >> 1) == cm->mi_cols)) { BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rate = 0; chosen_dist = 0; @@ -1788,8 +1934,8 @@ static void rd_use_partition(VP9_COMP *cpi, // Split partition. for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2); - int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2); + int x_idx = (i & 1) * (mi_step >> 1); + int y_idx = (i >> 1) * (mi_step >> 1); int rt = 0; int64_t dt = 0; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; @@ -1823,14 +1969,11 @@ static void rd_use_partition(VP9_COMP *cpi, encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0, split_subsize); - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row + y_idx, mi_col + x_idx, + pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, split_subsize); chosen_rate += x->partition_cost[pl][PARTITION_NONE]; } - pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rate < INT_MAX) { chosen_rate += x->partition_cost[pl][PARTITION_SPLIT]; chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist); @@ -1868,14 +2011,14 @@ static void rd_use_partition(VP9_COMP *cpi, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, - output_enabled, chosen_rate); - } - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - cpi->cyclic_refresh.projected_rate_sb = chosen_rate; - cpi->cyclic_refresh.projected_dist_sb = chosen_dist; + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, + output_enabled, chosen_rate); } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + chosen_rate, chosen_dist); + encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); } @@ -1923,7 +2066,7 @@ static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8, *min_block_size = MIN(*min_block_size, sb_type); *max_block_size = MAX(*max_block_size, sb_type); } - index += xd->mode_info_stride; + index += xd->mi_stride; } } @@ -1939,77 +2082,71 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { // Look at neighboring blocks and set a min and max partition size based on // what they chose. static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, - int row, int col, + int mi_row, int mi_col, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size) { - VP9_COMMON * const cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - MODE_INFO ** mi_8x8 = xd->mi_8x8; - MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8; - + MODE_INFO **mi_8x8 = xd->mi; const int left_in_image = xd->left_available && mi_8x8[-1]; const int above_in_image = xd->up_available && - mi_8x8[-xd->mode_info_stride]; - MODE_INFO ** above_sb64_mi_8x8; - MODE_INFO ** left_sb64_mi_8x8; + mi_8x8[-xd->mi_stride]; + MODE_INFO **above_sb64_mi_8x8; + MODE_INFO **left_sb64_mi_8x8; - int row8x8_remaining = tile->mi_row_end - row; - int col8x8_remaining = tile->mi_col_end - col; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; - + BLOCK_SIZE min_size = BLOCK_4X4; + BLOCK_SIZE max_size = BLOCK_64X64; // Trap case where we do not have a prediction. - if (!left_in_image && !above_in_image && - ((cm->frame_type == KEY_FRAME) || !cm->prev_mi)) { - *min_block_size = BLOCK_4X4; - *max_block_size = BLOCK_64X64; - } else { + if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { // Default "min to max" and "max to min" - *min_block_size = BLOCK_64X64; - *max_block_size = BLOCK_4X4; + min_size = BLOCK_64X64; + max_size = BLOCK_4X4; // NOTE: each call to get_sb_partition_size_range() uses the previous // passed in values for min and max as a starting point. - // // Find the min and max partition used in previous frame at this location - if (cm->prev_mi && (cm->frame_type != KEY_FRAME)) { - get_sb_partition_size_range(cpi, prev_mi_8x8, - min_block_size, max_block_size); + if (cm->frame_type != KEY_FRAME) { + MODE_INFO **const prev_mi = + &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; + get_sb_partition_size_range(cpi, prev_mi, &min_size, &max_size); } - // Find the min and max partition sizes used in the left SB64 if (left_in_image) { left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE]; get_sb_partition_size_range(cpi, left_sb64_mi_8x8, - min_block_size, max_block_size); + &min_size, &max_size); } - // Find the min and max partition sizes used in the above SB64. if (above_in_image) { - above_sb64_mi_8x8 = &mi_8x8[-xd->mode_info_stride * MI_BLOCK_SIZE]; + above_sb64_mi_8x8 = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE]; get_sb_partition_size_range(cpi, above_sb64_mi_8x8, - min_block_size, max_block_size); + &min_size, &max_size); + } + // adjust observed min and max + if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { + min_size = min_partition_size[min_size]; + max_size = max_partition_size[max_size]; } } - // adjust observed min and max - if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { - *min_block_size = min_partition_size[*min_block_size]; - *max_block_size = max_partition_size[*max_block_size]; - } - - // Check border cases where max and min from neighbours may not be legal. - *max_block_size = find_partition_size(*max_block_size, - row8x8_remaining, col8x8_remaining, - &bh, &bw); - *min_block_size = MIN(*min_block_size, *max_block_size); + // Check border cases where max and min from neighbors may not be legal. + max_size = find_partition_size(max_size, + row8x8_remaining, col8x8_remaining, + &bh, &bw); + min_size = MIN(min_size, max_size); // When use_square_partition_only is true, make sure at least one square // partition is allowed by selecting the next smaller square size as // *min_block_size. if (cpi->sf.use_square_partition_only && - next_square_size[*max_block_size] < *min_block_size) { - *min_block_size = next_square_size[*max_block_size]; + next_square_size[max_size] < min_size) { + min_size = next_square_size[max_size]; } + *min_block_size = min_size; + *max_block_size = max_size; } static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { @@ -2029,7 +2166,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int64_t *dist, int do_recon, int64_t best_rd) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; + MACROBLOCKD *const xd = &x->e_mbd; + const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; TOKENEXTRA *tp_orig = *tp; @@ -2042,8 +2180,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int do_split = bsize >= BLOCK_8X8; int do_rect = 1; // Override skipping rectangular partition operations for edge blocks - const int force_horz_split = (mi_row + ms >= cm->mi_rows); - const int force_vert_split = (mi_col + ms >= cm->mi_cols); + const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); + const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); const int xss = x->e_mbd.plane[1].subsampling_x; const int yss = x->e_mbd.plane[1].subsampling_y; @@ -2069,6 +2207,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); + } else { + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } // Determine partition types in search according to the speed features. @@ -2110,9 +2250,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ctx, best_rd); if (this_rate != INT_MAX) { if (bsize >= BLOCK_8X8) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rate += x->partition_cost[pl][PARTITION_NONE]; } sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); @@ -2157,8 +2295,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (do_split) { subsize = get_subsize(bsize, PARTITION_SPLIT); for (i = 0; i < 4 && sum_rd < best_rd; ++i) { - const int x_idx = (i & 1) * ms; - const int y_idx = (i >> 1) * ms; + const int x_idx = (i & 1) * mi_step; + const int y_idx = (i >> 1) * mi_step; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; @@ -2182,9 +2320,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd && i == 4) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -2216,7 +2352,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) { + if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) { update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); @@ -2228,7 +2364,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate, + rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); if (this_rate == INT_MAX) { @@ -2240,9 +2376,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_HORZ]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -2269,7 +2403,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { + if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) { update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); @@ -2281,7 +2415,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); if (this_rate == INT_MAX) { @@ -2293,9 +2427,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_VERT]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -2323,13 +2455,14 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate); - } - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - cpi->cyclic_refresh.projected_rate_sb = best_rate; - cpi->cyclic_refresh.projected_dist_sb = best_dist; + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, + best_rate); } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + best_rate, best_dist); + encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); } if (bsize == BLOCK_64X64) { @@ -2344,11 +2477,13 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + SPEED_FEATURES *const sf = &cpi->sf; int mi_col; // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); + vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; @@ -2359,7 +2494,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE i; MACROBLOCK *x = &cpi->mb; - if (cpi->sf.adaptive_pred_interp_filter) { + if (sf->adaptive_pred_interp_filter) { for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) { const int num_4x4_w = num_4x4_blocks_wide_lookup[i]; const int num_4x4_h = num_4x4_blocks_high_lookup[i]; @@ -2373,63 +2508,69 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, vp9_zero(cpi->mb.pred_mv); - if ((cpi->sf.partition_search_type == SEARCH_PARTITION && - cpi->sf.use_lastframe_partitioning) || - cpi->sf.partition_search_type == FIXED_PARTITION || - cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { - const int idx_str = cm->mode_info_stride * mi_row + mi_col; + if ((sf->partition_search_type == SEARCH_PARTITION && + sf->use_lastframe_partitioning) || + sf->partition_search_type == FIXED_PARTITION || + sf->partition_search_type == VAR_BASED_PARTITION || + sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { + const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; cpi->mb.source_variance = UINT_MAX; - if (cpi->sf.partition_search_type == FIXED_PARTITION) { + if (sf->partition_search_type == FIXED_PARTITION) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - cpi->sf.always_this_block_size); + sf->always_this_block_size); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); - } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { + } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); - } else if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) { + } else if (sf->partition_search_type == VAR_BASED_PARTITION) { choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); } else { if ((cm->current_video_frame - % cpi->sf.last_partitioning_redo_frequency) == 0 + % sf->last_partitioning_redo_frequency) == 0 || cm->prev_mi == 0 || cm->show_frame == 0 || cm->frame_type == KEY_FRAME || cpi->rc.is_src_frame_alt_ref - || ((cpi->sf.use_lastframe_partitioning == + || ((sf->use_lastframe_partitioning == LAST_FRAME_PARTITION_LOW_MOTION) && sb_has_motion(cm, prev_mi_8x8))) { // If required set upper and lower partition size limits - if (cpi->sf.auto_min_max_partition_size) { + if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile, mi_row, mi_col, - &cpi->sf.min_partition_size, - &cpi->sf.max_partition_size); + &sf->min_partition_size, + &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); } else { - copy_partitioning(cm, mi_8x8, prev_mi_8x8); + if (sf->constrain_copy_partition && + sb_has_motion(cm, prev_mi_8x8)) + constrain_copy_partitioning(cpi, tile, mi_8x8, prev_mi_8x8, + mi_row, mi_col, BLOCK_16X16); + else + copy_partitioning(cm, mi_8x8, prev_mi_8x8); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); } } } else { // If required set upper and lower partition size limits - if (cpi->sf.auto_min_max_partition_size) { + if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile, mi_row, mi_col, - &cpi->sf.min_partition_size, - &cpi->sf.max_partition_size); + &sf->min_partition_size, + &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); @@ -2444,9 +2585,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); x->act_zbin_adj = 0; - cpi->seg0_idx = 0; - - xd->mode_info_stride = cm->mode_info_stride; // Copy data over into macro block data structures. vp9_setup_src_planes(x, cpi->Source, 0, 0); @@ -2458,27 +2596,16 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); - xd->mi_8x8[0]->mbmi.mode = DC_PRED; - xd->mi_8x8[0]->mbmi.uv_mode = DC_PRED; - - vp9_zero(cm->counts.y_mode); - vp9_zero(cm->counts.uv_mode); - vp9_zero(cm->counts.inter_mode); - vp9_zero(cm->counts.partition); - vp9_zero(cm->counts.intra_inter); - vp9_zero(cm->counts.comp_inter); - vp9_zero(cm->counts.single_ref); - vp9_zero(cm->counts.comp_ref); - vp9_zero(cm->counts.tx); - vp9_zero(cm->counts.skip); + xd->mi[0]->mbmi.mode = DC_PRED; + xd->mi[0]->mbmi.uv_mode = DC_PRED; // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(cpi->above_context[0], 0, - sizeof(*cpi->above_context[0]) * + vpx_memset(xd->above_context[0], 0, + sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE); - vpx_memset(cpi->above_seg_context, 0, - sizeof(*cpi->above_seg_context) * aligned_mi_cols); + vpx_memset(xd->above_seg_context, 0, + sizeof(*xd->above_seg_context) * aligned_mi_cols); } static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { @@ -2508,100 +2635,15 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { } } -static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) { - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) { - if (!mi_8x8[y * mis + x]->mbmi.skip) - return 0; - } - } - - return 1; -} - -static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs, - TX_SIZE tx_size) { - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) - mi_8x8[y * mis + x]->mbmi.tx_size = tx_size; - } -} - -static void reset_skip_txfm_size_b(const VP9_COMMON *cm, int mis, - TX_SIZE max_tx_size, int bw, int bh, - int mi_row, int mi_col, - MODE_INFO **mi_8x8) { - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) { - return; - } else { - const MB_MODE_INFO *const mbmi = &mi_8x8[0]->mbmi; - if (mbmi->tx_size > max_tx_size) { - const int ymbs = MIN(bh, cm->mi_rows - mi_row); - const int xmbs = MIN(bw, cm->mi_cols - mi_col); - - assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) || - get_skip_flag(mi_8x8, mis, ymbs, xmbs)); - set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size); - } - } -} - -static void reset_skip_txfm_size_sb(VP9_COMMON *cm, MODE_INFO **mi_8x8, - TX_SIZE max_tx_size, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const int mis = cm->mode_info_stride; - int bw, bh; - const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; - - bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type]; - bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type]; - - if (bw == bs && bh == bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, bs, mi_row, mi_col, - mi_8x8); - } else if (bw == bs && bh < bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row, mi_col, - mi_8x8); - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row + hbs, - mi_col, mi_8x8 + hbs * mis); - } else if (bw < bs && bh == bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, mi_col, - mi_8x8); - reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, - mi_col + hbs, mi_8x8 + hbs); - } else { - const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize]; - int n; - - assert(bw < bs && bh < bs); - - for (n = 0; n < 4; n++) { - const int mi_dc = hbs * (n & 1); - const int mi_dr = hbs * (n >> 1); - - reset_skip_txfm_size_sb(cm, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size, - mi_row + mi_dr, mi_col + mi_dc, subsize); - } - } -} - static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) { int mi_row, mi_col; - const int mis = cm->mode_info_stride; - MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible; + const int mis = cm->mi_stride; + MODE_INFO **mi_ptr = cm->mi_grid_visible; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) { - mi_8x8 = mi_ptr; - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) { - reset_skip_txfm_size_sb(cm, mi_8x8, txfm_max, mi_row, mi_col, - BLOCK_64X64); + for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { + for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { + if (mi_ptr[mi_col]->mbmi.tx_size > txfm_max) + mi_ptr[mi_col]->mbmi.tx_size = txfm_max; } } } @@ -2680,16 +2722,347 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; set_offsets(cpi, tile, mi_row, mi_col, bsize); - xd->mi_8x8[0]->mbmi.sb_type = bsize; + xd->mi[0]->mbmi.sb_type = bsize; if (!frame_is_intra_only(cm)) { vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize); } else { MB_PREDICTION_MODE intramode = DC_PRED; - set_mode_info(&xd->mi_8x8[0]->mbmi, bsize, intramode); + set_mode_info(&xd->mi[0]->mbmi, bsize, intramode); + } + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); +} + +static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, + int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE subsize) { + MACROBLOCKD *xd = &x->e_mbd; + int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = partition_lookup[bsl][subsize]; + + assert(bsize >= BLOCK_8X8); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + break; + case PARTITION_VERT: + *get_sb_index(x, subsize) = 0; + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + + if (mi_col + hbs < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + set_modeinfo_offsets(cm, xd, mi_row, mi_col + hbs); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, bsize); + } + break; + case PARTITION_HORZ: + *get_sb_index(x, subsize) = 0; + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + if (mi_row + hbs < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + set_modeinfo_offsets(cm, xd, mi_row + hbs, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, bsize); + } + break; + case PARTITION_SPLIT: + *get_sb_index(x, subsize) = 0; + fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 1; + fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 2; + fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 3; + fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize, + *(get_sb_partitioning(x, subsize))); + break; + default: + break; + } +} + +static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, + TOKENEXTRA **tp, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *rate, + int64_t *dist, int do_recon, int64_t best_rd) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; + TOKENEXTRA *tp_orig = *tp; + PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); + int i; + BLOCK_SIZE subsize; + int this_rate, sum_rate = 0, best_rate = INT_MAX; + int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX; + int64_t sum_rd = 0; + int do_split = bsize >= BLOCK_8X8; + int do_rect = 1; + // Override skipping rectangular partition operations for edge blocks + const int force_horz_split = (mi_row + ms >= cm->mi_rows); + const int force_vert_split = (mi_col + ms >= cm->mi_cols); + const int xss = x->e_mbd.plane[1].subsampling_x; + const int yss = x->e_mbd.plane[1].subsampling_y; + + int partition_none_allowed = !force_horz_split && !force_vert_split; + int partition_horz_allowed = !force_vert_split && yss <= xss && + bsize >= BLOCK_8X8; + int partition_vert_allowed = !force_horz_split && xss <= yss && + bsize >= BLOCK_8X8; + (void) *tp_orig; + + if (bsize < BLOCK_8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. + if (x->ab_index != 0) { + *rate = 0; + *dist = 0; + return; + } + } + + assert(num_8x8_blocks_wide_lookup[bsize] == + num_8x8_blocks_high_lookup[bsize]); + + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); + + // Determine partition types in search according to the speed features. + // The threshold set here has to be of square block size. + if (cpi->sf.auto_min_max_partition_size) { + partition_none_allowed &= (bsize <= cpi->sf.max_partition_size && + bsize >= cpi->sf.min_partition_size); + partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_horz_split); + partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_vert_split); + do_split &= bsize > cpi->sf.min_partition_size; + } + if (cpi->sf.use_square_partition_only) { + partition_horz_allowed &= force_horz_split; + partition_vert_allowed &= force_vert_split; + } + + if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed)) + do_split = 0; + + // PARTITION_NONE + if (partition_none_allowed) { + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, bsize); + ctx->mic.mbmi = xd->mi[0]->mbmi; + + if (this_rate != INT_MAX) { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_NONE]; + sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); + if (sum_rd < best_rd) { + int64_t stop_thresh = 4096; + int64_t stop_thresh_rd; + + best_rate = this_rate; + best_dist = this_dist; + best_rd = sum_rd; + if (bsize >= BLOCK_8X8) + *(get_sb_partitioning(x, bsize)) = bsize; + + // Adjust threshold according to partition size. + stop_thresh >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh); + // If obtained distortion is very small, choose current partition + // and stop splitting. + if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) { + do_split = 0; + do_rect = 0; + } + } + } + if (!x->in_active_map) { + do_split = 0; + do_rect = 0; + } + } + + // store estimated motion vector + store_pred_mv(x, ctx); + + // PARTITION_SPLIT + sum_rd = 0; + if (do_split) { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; + subsize = get_subsize(bsize, PARTITION_SPLIT); + for (i = 0; i < 4 && sum_rd < best_rd; ++i) { + const int x_idx = (i & 1) * ms; + const int y_idx = (i >> 1) * ms; + + if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) + continue; + + *get_sb_index(x, subsize) = i; + load_pred_mv(x, ctx); + + nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, + subsize, &this_rate, &this_dist, 0, + best_rd - sum_rd); + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } else { + // skip rectangular partition test when larger block size + // gives better rd cost + if (cpi->sf.less_rectangular_check) + do_rect &= !partition_none_allowed; + } + } + + // PARTITION_HORZ + if (partition_horz_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_HORZ); + *get_sb_index(x, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, subsize); + + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, + &this_rate, &this_dist, subsize); + + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_HORZ]; + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd) { + best_rd = sum_rd; + best_rate = sum_rate; + best_dist = sum_dist; + *(get_sb_partitioning(x, bsize)) = subsize; + } + } + + // PARTITION_VERT + if (partition_vert_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_VERT); + + *get_sb_index(x, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, + &this_rate, &this_dist, subsize); + + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_VERT]; + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } + } + + *rate = best_rate; + *dist = best_dist; + + if (best_rate == INT_MAX) + return; + + // update mode info array + fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, + *(get_sb_partitioning(x, bsize))); + + if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) { + int output_enabled = (bsize == BLOCK_64X64); + + // Check the projected output rate for this SB against it's target + // and and if necessary apply a Q delta using segmentation to get + // closer to the target. + if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, + best_rate); + } + + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + best_rate, best_dist); + + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); + } + + if (bsize == BLOCK_64X64) { + assert(tp_orig < *tp); + assert(best_rate < INT_MAX); + assert(best_dist < INT64_MAX); + } else { + assert(tp_orig == *tp); } - duplicate_modeinfo_in_sb(cm, xd, mi_row, mi_col, bsize); } static void nonrd_use_partition(VP9_COMP *cpi, @@ -2701,35 +3074,34 @@ static void nonrd_use_partition(VP9_COMP *cpi, int *totrate, int64_t *totdist) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; PARTITION_TYPE partition; BLOCK_SIZE subsize; - int rate; - int64_t dist; + int rate = INT_MAX; + int64_t dist = INT64_MAX; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - if (bsize >= BLOCK_8X8) { - subsize = mi_8x8[0]->mbmi.sb_type; - } else { - subsize = BLOCK_4X4; - } - + subsize = (bsize >= BLOCK_8X8) ? mi_8x8[0]->mbmi.sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; switch (partition) { case PARTITION_NONE: nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; break; case PARTITION_VERT: *get_sb_index(x, subsize) = 0; nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; if (mi_col + hbs < cm->mi_cols) { *get_sb_index(x, subsize) = 1; nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs, &rate, &dist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -2740,10 +3112,12 @@ static void nonrd_use_partition(VP9_COMP *cpi, case PARTITION_HORZ: *get_sb_index(x, subsize) = 0; nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; if (mi_row + hbs < cm->mi_rows) { *get_sb_index(x, subsize) = 1; nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col, &rate, &dist, subsize); + get_block_context(x, subsize)->mic.mbmi = mi_8x8[0]->mbmi; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -2753,7 +3127,6 @@ static void nonrd_use_partition(VP9_COMP *cpi, break; case PARTITION_SPLIT: subsize = get_subsize(bsize, PARTITION_SPLIT); - *get_sb_index(x, subsize) = 0; nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, subsize, output_enabled, totrate, totdist); @@ -2790,10 +3163,9 @@ static void nonrd_use_partition(VP9_COMP *cpi, } if (bsize == BLOCK_64X64 && output_enabled) { - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - cpi->cyclic_refresh.projected_rate_sb = *totrate; - cpi->cyclic_refresh.projected_dist_sb = *totdist; - } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + *totrate, *totdist); encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize); } } @@ -2801,132 +3173,102 @@ static void nonrd_use_partition(VP9_COMP *cpi, static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &cpi->mb.e_mbd; int mi_col; // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); + vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { - int dummy_rate; - int64_t dummy_dist; - const int idx_str = cm->mode_info_stride * mi_row + mi_col; + int dummy_rate = 0; + int64_t dummy_dist = 0; + const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; - - BLOCK_SIZE bsize = cpi->sf.partition_search_type == FIXED_PARTITION ? - cpi->sf.always_this_block_size : - get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); + BLOCK_SIZE bsize; cpi->mb.source_variance = UINT_MAX; + vp9_zero(cpi->mb.pred_mv); // Set the partition type of the 64X64 block - if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) - choose_partitioning(cpi, tile, mi_row, mi_col); - else if (cpi->sf.partition_search_type == REFERENCE_PARTITION) { - if (cpi->sf.partition_check) { - MACROBLOCK *x = &cpi->mb; - int rate1, rate2, rate3; - int64_t dist1, dist2, dist3; - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_8X8); + switch (cpi->sf.partition_search_type) { + case VAR_BASED_PARTITION: + choose_partitioning(cpi, tile, mi_row, mi_col); nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - 0, &rate1, &dist1); - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_16X16); + 1, &dummy_rate, &dummy_dist); + break; + case SOURCE_VAR_BASED_PARTITION: + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col); nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - 0, &rate2, &dist2); - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_32X32); + 1, &dummy_rate, &dummy_dist); + break; + case VAR_BASED_FIXED_PARTITION: + case FIXED_PARTITION: + bsize = cpi->sf.partition_search_type == FIXED_PARTITION ? + cpi->sf.always_this_block_size : + get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); + set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - 0, &rate3, &dist3); - - if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) < - RDCOST(x->rdmult, x->rddiv, rate2, dist2)) { - if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) < - RDCOST(x->rdmult, x->rddiv, rate3, dist3)) - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - BLOCK_8X8); - else - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - BLOCK_32X32); + 1, &dummy_rate, &dummy_dist); + break; + case REFERENCE_PARTITION: + if (cpi->sf.partition_check || sb_has_motion(cm, prev_mi_8x8)) { + nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1, INT64_MAX); } else { - if (RDCOST(x->rdmult, x->rddiv, rate2, dist2) < - RDCOST(x->rdmult, x->rddiv, rate3, dist3)) - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - BLOCK_16X16); - else - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - BLOCK_32X32); - } - - } else { - if (!sb_has_motion(cm, prev_mi_8x8)) copy_partitioning(cm, mi_8x8, prev_mi_8x8); - else - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); - } + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + BLOCK_64X64, 1, &dummy_rate, &dummy_dist); + } + break; + default: + assert(0); } - else - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); - - nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1, - &dummy_rate, &dummy_dist); } } // end RTC play code static void encode_frame_internal(VP9_COMP *cpi) { - int mi_row; + SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; -// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", -// cpi->common.current_video_frame, cpi->common.show_frame, -// cm->frame_type); - - vp9_zero(cm->counts.switchable_interp); - vp9_zero(cpi->tx_stepdown_count); - - xd->mi_8x8 = cm->mi_grid_visible; - // required for vp9_frame_init_quantizer - xd->mi_8x8[0] = cm->mi; + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; - vp9_zero(cm->counts.mv); + vp9_zero(cm->counts); vp9_zero(cpi->coef_counts); - vp9_zero(cm->counts.eob_branch); + vp9_zero(cpi->tx_stepdown_count); + vp9_zero(cpi->rd_comp_pred_diff); + vp9_zero(cpi->rd_filter_diff); + vp9_zero(cpi->rd_tx_select_diff); + vp9_zero(cpi->rd_tx_select_threshes); - // Set frame level transform size use case cm->tx_mode = select_tx_mode(cpi); - cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 - && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; + cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && + cm->y_dc_delta_q == 0 && + cm->uv_dc_delta_q == 0 && + cm->uv_ac_delta_q == 0; switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); vp9_frame_init_quantizer(cpi); vp9_initialize_rd_consts(cpi); vp9_initialize_me_consts(cpi, cm->base_qindex); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Initialize encode frame context. - init_encode_frame_mb_context(cpi); - - // Build a frame level activity map - build_activity_map(cpi); - } - - // Re-initialize encode frame context. init_encode_frame_mb_context(cpi); - vp9_zero(cpi->rd_comp_pred_diff); - vp9_zero(cpi->rd_filter_diff); - vp9_zero(cpi->rd_tx_select_diff); - vp9_zero(cpi->rd_tx_select_threshes); + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + build_activity_map(cpi); - set_prev_mi(cm); + cm->prev_mi = get_prev_mi(cm); - if (cpi->sf.use_nonrd_pick_mode) { + if (sf->use_nonrd_pick_mode) { // Initialize internal buffer pointers for rtc coding, where non-RD // mode decision is used and hence no buffer pointer swap needed. int i; @@ -2941,6 +3283,29 @@ static void encode_frame_internal(VP9_COMP *cpi) { p[i].eobs = ctx->eobs_pbuf[i][0]; } vp9_zero(x->zcoeff_blk); + + if (cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION && + cm->current_video_frame > 0) { + int check_freq = cpi->sf.search_type_check_frequency; + + if ((cm->current_video_frame - 1) % check_freq == 0) { + cpi->use_large_partition_rate = 0; + } + + if ((cm->current_video_frame - 1) % check_freq == 1) { + const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] - + b_width_log2_lookup[BLOCK_16X16]) + + (b_height_log2_lookup[BLOCK_32X32] - + b_height_log2_lookup[BLOCK_16X16])); + cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 * + mbs_in_b32x32 / cm->MBs; + } + + if ((cm->current_video_frame - 1) % check_freq >= 1) { + if (cpi->use_large_partition_rate < 15) + cpi->sf.partition_search_type = FIXED_PARTITION; + } + } } { @@ -2958,12 +3323,13 @@ static void encode_frame_internal(VP9_COMP *cpi) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { TileInfo tile; TOKENEXTRA *tp_old = tp; + int mi_row; // For each row of SBs in the frame vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) { - if (cpi->sf.use_nonrd_pick_mode && cm->frame_type != KEY_FRAME) + if (sf->use_nonrd_pick_mode && cm->frame_type != KEY_FRAME) encode_nonrd_sb_row(cpi, &tile, mi_row, &tp); else encode_rd_sb_row(cpi, &tile, mi_row, &tp); @@ -2978,18 +3344,18 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); } - if (cpi->sf.skip_encode_sb) { + if (sf->skip_encode_sb) { int j; unsigned int intra_count = 0, inter_count = 0; for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { intra_count += cm->counts.intra_inter[j][0]; inter_count += cm->counts.intra_inter[j][1]; } - cpi->sf.skip_encode_frame = (intra_count << 2) < inter_count && - cm->frame_type != KEY_FRAME && - cm->show_frame; + sf->skip_encode_frame = (intra_count << 2) < inter_count && + cm->frame_type != KEY_FRAME && + cm->show_frame; } else { - cpi->sf.skip_encode_frame = 0; + sf->skip_encode_frame = 0; } #if 0 @@ -3023,33 +3389,31 @@ void vp9_encode_frame(VP9_COMP *cpi) { if (cpi->sf.frame_parameter_update) { int i; - REFERENCE_MODE reference_mode; - /* - * This code does a single RD pass over the whole frame assuming - * either compound, single or hybrid prediction as per whatever has - * worked best for that type of frame in the past. - * It also predicts whether another coding mode would have worked - * better that this coding mode. If that is the case, it remembers - * that for subsequent frames. - * It does the same analysis for transform size selection also. - */ + + // This code does a single RD pass over the whole frame assuming + // either compound, single or hybrid prediction as per whatever has + // worked best for that type of frame in the past. + // It also predicts whether another coding mode would have worked + // better that this coding mode. If that is the case, it remembers + // that for subsequent frames. + // It does the same analysis for transform size selection also. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type]; const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type]; /* prediction (compound, single or hybrid) mode selection */ - if (frame_type == 3 || !cm->allow_comp_inter_inter) - reference_mode = SINGLE_REFERENCE; + if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter) + cm->reference_mode = SINGLE_REFERENCE; else if (mode_thresh[COMPOUND_REFERENCE] > mode_thresh[SINGLE_REFERENCE] && mode_thresh[COMPOUND_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT] && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) - reference_mode = COMPOUND_REFERENCE; + cm->reference_mode = COMPOUND_REFERENCE; else if (mode_thresh[SINGLE_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT]) - reference_mode = SINGLE_REFERENCE; + cm->reference_mode = SINGLE_REFERENCE; else - reference_mode = REFERENCE_MODE_SELECT; + cm->reference_mode = REFERENCE_MODE_SELECT; if (cm->interp_filter == SWITCHABLE) { if (frame_type != ALTREF_FRAME && @@ -3065,9 +3429,6 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } - cpi->mb.e_mbd.lossless = cpi->oxcf.lossless; - cm->reference_mode = reference_mode; - encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { @@ -3146,10 +3507,8 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } } else { - cpi->mb.e_mbd.lossless = cpi->oxcf.lossless; cm->reference_mode = SINGLE_REFERENCE; - // Force the usage of the BILINEAR interp_filter. - cm->interp_filter = BILINEAR; + cm->interp_filter = SWITCHABLE; encode_frame_internal(cpi); } } @@ -3214,19 +3573,20 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO **mi_8x8 = xd->mi_8x8; + MODE_INFO **mi_8x8 = xd->mi; MODE_INFO *mi = mi_8x8[0]; MB_MODE_INFO *mbmi = &mi->mbmi; PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); unsigned int segment_id = mbmi->segment_id; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 && - (cpi->oxcf.aq_mode != COMPLEXITY_AQ && - cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) && - !cpi->sf.use_nonrd_pick_mode; + cpi->oxcf.aq_mode != COMPLEXITY_AQ && + cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && + cpi->sf.allow_skip_recode; + x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; @@ -3243,7 +3603,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, } } else { set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { // Adjust the zbin based on this MB rate. diff --git a/libvpx/vp9/encoder/vp9_encodeframe.h b/libvpx/vp9/encoder/vp9_encodeframe.h index 72343cd..131e932 100644 --- a/libvpx/vp9/encoder/vp9_encodeframe.h +++ b/libvpx/vp9/encoder/vp9_encodeframe.h @@ -20,6 +20,12 @@ struct macroblock; struct yv12_buffer_config; struct VP9_COMP; +typedef struct { + unsigned int sse; + int sum; + unsigned int var; +} diff; + void vp9_setup_src_planes(struct macroblock *x, const struct yv12_buffer_config *src, int mi_row, int mi_col); diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c index fae03bf..5e98e4e 100644 --- a/libvpx/vp9/encoder/vp9_encodemb.c +++ b/libvpx/vp9/encoder/vp9_encodemb.c @@ -111,7 +111,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *p = &mb->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); + const int ref = is_inter_block(&xd->mi[0]->mbmi); vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); @@ -139,7 +139,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; - if (!is_inter_block(&mb->e_mbd.mi_8x8[0]->mbmi)) + if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi)) rdmult = (rdmult * 9) >> 4; rddiv = mb->rddiv; /* Initialize the sentinel node of the trellis. */ @@ -452,7 +452,7 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct encode_b_args arg = {x, &ctx, &mbmi->skip}; int plane; @@ -477,7 +477,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, struct encode_b_args* const args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; int16_t *coeff = BLOCK_OFFSET(p->coeff, block); @@ -562,7 +562,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, case TX_4X4: tx_type = get_tx_type_4x4(pd->plane_type, xd, block); scan_order = &vp9_scan_orders[TX_4X4][tx_type]; - mode = plane == 0 ? get_y_mode(xd->mi_8x8[0], block) : mbmi->uv_mode; + mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, @@ -608,14 +608,14 @@ void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block, void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { const MACROBLOCKD *const xd = &x->e_mbd; - struct encode_b_args arg = {x, NULL, &xd->mi_8x8[0]->mbmi.skip}; + struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip}; vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra, &arg); } int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) { - MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi; + MB_MODE_INFO * mbmi = &x->e_mbd.mi[0]->mbmi; x->skip_encode = 0; mbmi->mode = DC_PRED; mbmi->ref_frame[0] = INTRA_FRAME; diff --git a/libvpx/vp9/encoder/vp9_encodemv.c b/libvpx/vp9/encoder/vp9_encodemv.c index 2a10bbf..9d44865 100644 --- a/libvpx/vp9/encoder/vp9_encodemv.c +++ b/libvpx/vp9/encoder/vp9_encodemv.c @@ -242,7 +242,7 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const int_mv mvs[2], } void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd) { - const MODE_INFO *mi = xd->mi_8x8[0]; + const MODE_INFO *mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; if (mbmi->sb_type < BLOCK_8X8) { diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c index c4c219b..db32ef8 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.c +++ b/libvpx/vp9/encoder/vp9_firstpass.c @@ -23,6 +23,7 @@ #include "vp9/common/vp9_reconinter.h" // vp9_setup_dst_planes() #include "vp9/common/vp9_systemdependent.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" @@ -34,7 +35,6 @@ #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" -#include "vp9/encoder/vp9_vaq.h" #include "vp9/encoder/vp9_variance.h" #define OUTPUT_FPF 0 @@ -54,8 +54,6 @@ #define MIN_KF_BOOST 300 -#define DISABLE_RC_LONG_TERM_MEM 0 - #if CONFIG_MULTIPLE_ARF // Set MIN_GF_INTERVAL to 1 for the full decomposition. #define MIN_GF_INTERVAL 2 @@ -63,6 +61,8 @@ #define MIN_GF_INTERVAL 4 #endif +#define DISABLE_RC_LONG_TERM_MEM + static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { YV12_BUFFER_CONFIG temp = *a; *a = *b; @@ -257,12 +257,22 @@ static void avg_stats(FIRSTPASS_STATS *section) { // harder frames. static double calculate_modified_err(const VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame) { - const struct twopass_rc *const twopass = &cpi->twopass; - const FIRSTPASS_STATS *const stats = &twopass->total_stats; - const double av_err = stats->ssim_weighted_pred_err / stats->count; - double modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / - DOUBLE_DIVIDE_CHECK(av_err), - cpi->oxcf.two_pass_vbrbias / 100.0); + const struct twopass_rc *twopass = &cpi->twopass; + const SVC *const svc = &cpi->svc; + const FIRSTPASS_STATS *stats; + double av_err; + double modified_error; + + if (svc->number_spatial_layers > 1 && + svc->number_temporal_layers == 1) { + twopass = &svc->layer_context[svc->spatial_layer_id].twopass; + } + + stats = &twopass->total_stats; + av_err = stats->ssim_weighted_pred_err / stats->count; + modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / + DOUBLE_DIVIDE_CHECK(av_err), + cpi->oxcf.two_pass_vbrbias / 100.0); return fclamp(modified_error, twopass->modified_error_min, twopass->modified_error_max); @@ -326,15 +336,13 @@ static double simple_weight(const YV12_BUFFER_CONFIG *buf) { } // This function returns the maximum target rate per frame. -static int frame_max_bits(const VP9_COMP *cpi) { - int64_t max_bits = - ((int64_t)cpi->rc.av_per_frame_bandwidth * - (int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100; - +static int frame_max_bits(const RATE_CONTROL *rc, const VP9_CONFIG *oxcf) { + int64_t max_bits = ((int64_t)rc->av_per_frame_bandwidth * + (int64_t)oxcf->two_pass_vbrmax_section) / 100; if (max_bits < 0) max_bits = 0; - else if (max_bits > cpi->rc.max_frame_bandwidth) - max_bits = cpi->rc.max_frame_bandwidth; + else if (max_bits > rc->max_frame_bandwidth) + max_bits = rc->max_frame_bandwidth; return (int)max_bits; } @@ -375,7 +383,7 @@ static unsigned int zz_motion_search(const MACROBLOCK *x) { const uint8_t *const ref = xd->plane[0].pre[0].buf; const int ref_stride = xd->plane[0].pre[0].stride; unsigned int sse; - vp9_variance_fn_t fn = get_block_variance_fn(xd->mi_8x8[0]->mbmi.sb_type); + vp9_variance_fn_t fn = get_block_variance_fn(xd->mi[0]->mbmi.sb_type); fn(src, src_stride, ref, ref_stride, &sse); return sse; } @@ -389,7 +397,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int num00, tmp_err, n, sr = 0; int step_param = 3; int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; int new_mv_mode_penalty = 256; const int quart_frm = MIN(cpi->common.width, cpi->common.height); @@ -533,8 +541,8 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); vp9_setup_dst_planes(xd, new_yv12, 0, 0); - xd->mi_8x8 = cm->mi_grid_visible; - xd->mi_8x8[0] = cm->mi; + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); @@ -582,8 +590,8 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); - xd->mi_8x8[0]->mbmi.sb_type = bsize; - xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; + xd->mi[0]->mbmi.sb_type = bsize; + xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize], mb_col << 1, num_8x8_blocks_wide_lookup[bsize], @@ -702,11 +710,11 @@ void vp9_first_pass(VP9_COMP *cpi) { mv.as_mv.row *= 8; mv.as_mv.col *= 8; this_error = motion_error; - xd->mi_8x8[0]->mbmi.mode = NEWMV; - xd->mi_8x8[0]->mbmi.mv[0] = mv; - xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; - xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; - xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE; + xd->mi[0]->mbmi.mode = NEWMV; + xd->mi[0]->mbmi.mv[0] = mv; + xd->mi[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME; + xd->mi[0]->mbmi.ref_frame[1] = NONE; vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize); vp9_encode_sby_pass1(x, bsize); sum_mvr += mv.as_mv.row; @@ -902,21 +910,21 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, const double section_err = fpstats->coded_error / fpstats->count; const double err_per_mb = section_err / num_mbs; + const double speed_term = 1.0 + ((double)cpi->speed * 0.04); if (section_target_bandwitdh <= 0) return rc->worst_quality; // Highest value allowed - target_norm_bits_per_mb = section_target_bandwitdh < (1 << 20) - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); + target_norm_bits_per_mb = + ((uint64_t)section_target_bandwitdh << BPER_MB_NORMBITS) / num_mbs; // Try and pick a max Q that will be high enough to encode the // content at the given rate. for (q = rc->best_quality; q < rc->worst_quality; ++q) { const double err_correction_factor = calc_correction_factor(err_per_mb, ERR_DIVISOR, 0.5, 0.90, q); - const int bits_per_mb_at_this_q = vp9_rc_bits_per_mb(INTER_FRAME, q, - err_correction_factor); + const int bits_per_mb_at_this_q = + vp9_rc_bits_per_mb(INTER_FRAME, q, (err_correction_factor * speed_term)); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } @@ -931,10 +939,18 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, extern void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_init_second_pass(VP9_COMP *cpi) { + SVC *const svc = &cpi->svc; FIRSTPASS_STATS this_frame; const FIRSTPASS_STATS *start_pos; - struct twopass_rc *const twopass = &cpi->twopass; + struct twopass_rc *twopass = &cpi->twopass; const VP9_CONFIG *const oxcf = &cpi->oxcf; + const int is_spatial_svc = (svc->number_spatial_layers > 1) && + (svc->number_temporal_layers == 1); + double frame_rate; + + if (is_spatial_svc) { + twopass = &svc->layer_context[svc->spatial_layer_id].twopass; + } zero_stats(&twopass->total_stats); zero_stats(&twopass->total_left_stats); @@ -945,30 +961,44 @@ void vp9_init_second_pass(VP9_COMP *cpi) { twopass->total_stats = *twopass->stats_in_end; twopass->total_left_stats = twopass->total_stats; + frame_rate = 10000000.0 * twopass->total_stats.count / + twopass->total_stats.duration; // Each frame can have a different duration, as the frame rate in the source // isn't guaranteed to be constant. The frame rate prior to the first frame // encoded in the second pass is a guess. However, the sum duration is not. // It is calculated based on the actual durations of all frames from the // first pass. - vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count / - twopass->total_stats.duration); + + if (is_spatial_svc) { + vp9_update_spatial_layer_framerate(cpi, frame_rate); + twopass->bits_left = + (int64_t)(twopass->total_stats.duration * + svc->layer_context[svc->spatial_layer_id].target_bandwidth / + 10000000.0); + } else { + vp9_new_framerate(cpi, frame_rate); + twopass->bits_left = (int64_t)(twopass->total_stats.duration * + oxcf->target_bandwidth / 10000000.0); + } cpi->output_framerate = oxcf->framerate; - twopass->bits_left = (int64_t)(twopass->total_stats.duration * - oxcf->target_bandwidth / 10000000.0); // Calculate a minimum intra value to be used in determining the IIratio // scores used in the second pass. We have this minimum to make sure // that clips that are static but "low complexity" in the intra domain // are still boosted appropriately for KF/GF/ARF. - twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; - twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + if (!is_spatial_svc) { + // We don't know the number of MBs for each layer at this point. + // So we will do it later. + twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + } // This variable monitors how far behind the second ref update is lagging. twopass->sr_update_lag = 1; - // Scan the first pass file and calculate an average Intra / Inter error score - // ratio for the sequence. + // Scan the first pass file and calculate an average Intra / Inter error + // score ratio for the sequence. { double sum_iiratio = 0.0; start_pos = twopass->stats_in; @@ -1027,8 +1057,8 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm, // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, - int still_interval, +static int detect_transition_to_still(struct twopass_rc *twopass, + int frame_interval, int still_interval, double loop_decay_rate, double last_decay_rate) { int trans_to_still = 0; @@ -1040,19 +1070,19 @@ static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, loop_decay_rate >= 0.999 && last_decay_rate < 0.9) { int j; - const FIRSTPASS_STATS *position = cpi->twopass.stats_in; + const FIRSTPASS_STATS *position = twopass->stats_in; FIRSTPASS_STATS tmp_next_frame; // Look ahead a few frames to see if static condition persists... for (j = 0; j < still_interval; ++j) { - if (EOF == input_stats(&cpi->twopass, &tmp_next_frame)) + if (EOF == input_stats(twopass, &tmp_next_frame)) break; if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999) break; } - reset_fpf_position(&cpi->twopass, position); + reset_fpf_position(twopass, position); // Only if it does do we signal a transition to still. if (j == still_interval) @@ -1374,9 +1404,11 @@ void define_fixed_arf_period(VP9_COMP *cpi) { // Analyse and define a gf/arf group. static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { + RATE_CONTROL *const rc = &cpi->rc; + VP9_CONFIG *const oxcf = &cpi->oxcf; + struct twopass_rc *const twopass = &cpi->twopass; FIRSTPASS_STATS next_frame = { 0 }; const FIRSTPASS_STATS *start_pos; - struct twopass_rc *const twopass = &cpi->twopass; int i; double boost_score = 0.0; double old_boost_score = 0.0; @@ -1395,16 +1427,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double mv_ratio_accumulator_thresh; - const int max_bits = frame_max_bits(cpi); // Max bits for a single frame. - - unsigned int allow_alt_ref = cpi->oxcf.play_alternate && - cpi->oxcf.lag_in_frames; + // Max bits for a single frame. + const int max_bits = frame_max_bits(rc, oxcf); + unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames; int f_boost = 0; int b_boost = 0; int flash_detected; int active_max_gf_interval; - RATE_CONTROL *const rc = &cpi->rc; twopass->gf_group_bits = 0; @@ -1476,7 +1506,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. - if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, + if (detect_transition_to_still(twopass, i, 5, loop_decay_rate, last_loop_decay_rate)) { allow_alt_ref = 0; break; @@ -1615,8 +1645,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the bits to be allocated to the group as a whole. if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) { - twopass->gf_group_bits = (int64_t)(cpi->twopass.kf_group_bits * - (gf_group_err / cpi->twopass.kf_group_error_left)); + twopass->gf_group_bits = (int64_t)(twopass->kf_group_bits * + (gf_group_err / twopass->kf_group_error_left)); } else { twopass->gf_group_bits = 0; } @@ -1705,10 +1735,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { { // Adjust KF group bits and error remaining. twopass->kf_group_error_left -= (int64_t)gf_group_err; - twopass->kf_group_bits -= twopass->gf_group_bits; - - if (twopass->kf_group_bits < 0) - twopass->kf_group_bits = 0; // If this is an arf update we want to remove the score for the overlay // frame at the end which will usually be very cheap to code. @@ -1725,11 +1751,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->gf_group_error_left = (int64_t)gf_group_err; } - twopass->gf_group_bits -= twopass->gf_bits; - - if (twopass->gf_group_bits < 0) - twopass->gf_group_bits = 0; - // This condition could fail if there are two kfs very close together // despite MIN_GF_INTERVAL and would cause a divide by 0 in the // calculation of alt_extra_bits. @@ -1738,8 +1759,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (boost >= 150) { const int pct_extra = MIN(20, (boost - 100) / 50); - const int alt_extra_bits = (int)((twopass->gf_group_bits * pct_extra) / - 100); + const int alt_extra_bits = (int)(( + MAX(twopass->gf_group_bits - twopass->gf_bits, 0) * + pct_extra) / 100); twopass->gf_group_bits -= alt_extra_bits; } } @@ -1768,40 +1790,36 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Allocate bits to a normal frame that is neither a gf an arf or a key frame. static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { + struct twopass_rc *twopass = &cpi->twopass; + // For a single frame. + const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); + // Calculate modified prediction error used in bit allocation. + const double modified_err = calculate_modified_err(cpi, this_frame); int target_frame_size; - double modified_err; double err_fraction; - const int max_bits = frame_max_bits(cpi); // Max for a single frame. - - // Calculate modified prediction error used in bit allocation. - modified_err = calculate_modified_err(cpi, this_frame); - if (cpi->twopass.gf_group_error_left > 0) + if (twopass->gf_group_error_left > 0) // What portion of the remaining GF group error is used by this frame. - err_fraction = modified_err / cpi->twopass.gf_group_error_left; + err_fraction = modified_err / twopass->gf_group_error_left; else err_fraction = 0.0; // How many of those bits available for allocation should we give it? - target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); + target_frame_size = (int)((double)twopass->gf_group_bits * err_fraction); // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at // the top end. target_frame_size = clamp(target_frame_size, 0, - MIN(max_bits, (int)cpi->twopass.gf_group_bits)); + MIN(max_bits, (int)twopass->gf_group_bits)); // Adjust error and bits remaining. - cpi->twopass.gf_group_error_left -= (int64_t)modified_err; - cpi->twopass.gf_group_bits -= target_frame_size; - - if (cpi->twopass.gf_group_bits < 0) - cpi->twopass.gf_group_bits = 0; + twopass->gf_group_error_left -= (int64_t)modified_err; // Per frame bit target for this frame. vp9_rc_set_frame_target(cpi, target_frame_size); } -static int test_candidate_kf(VP9_COMP *cpi, +static int test_candidate_kf(struct twopass_rc *twopass, const FIRSTPASS_STATS *last_frame, const FIRSTPASS_STATS *this_frame, const FIRSTPASS_STATS *next_frame) { @@ -1822,7 +1840,7 @@ static int test_candidate_kf(VP9_COMP *cpi, ((next_frame->intra_error / DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) { int i; - const FIRSTPASS_STATS *start_pos = cpi->twopass.stats_in; + const FIRSTPASS_STATS *start_pos = twopass->stats_in; FIRSTPASS_STATS local_next_frame = *next_frame; double boost_score = 0.0; double old_boost_score = 0.0; @@ -1859,7 +1877,7 @@ static int test_candidate_kf(VP9_COMP *cpi, old_boost_score = boost_score; // Get the next frame details - if (EOF == input_stats(&cpi->twopass, &local_next_frame)) + if (EOF == input_stats(twopass, &local_next_frame)) break; } @@ -1869,7 +1887,7 @@ static int test_candidate_kf(VP9_COMP *cpi, is_viable_kf = 1; } else { // Reset the file position - reset_fpf_position(&cpi->twopass, start_pos); + reset_fpf_position(twopass, start_pos); is_viable_kf = 0; } @@ -1882,16 +1900,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int i, j; RATE_CONTROL *const rc = &cpi->rc; struct twopass_rc *const twopass = &cpi->twopass; - FIRSTPASS_STATS last_frame; const FIRSTPASS_STATS first_frame = *this_frame; - FIRSTPASS_STATS next_frame; const FIRSTPASS_STATS *start_position = twopass->stats_in; - + FIRSTPASS_STATS next_frame; + FIRSTPASS_STATS last_frame; double decay_accumulator = 1.0; double zero_motion_accumulator = 1.0; - double boost_score = 0; - double loop_decay_rate; - + double boost_score = 0.0; double kf_mod_err = 0.0; double kf_group_err = 0.0; double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; @@ -1929,8 +1944,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Provided that we are not at the end of the file... if (cpi->oxcf.auto_key && lookup_next_frame_stats(twopass, &next_frame) != EOF) { + double loop_decay_rate; + // Check for a scene cut. - if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) + if (test_candidate_kf(twopass, &last_frame, this_frame, &next_frame)) break; // How fast is the prediction quality decaying? @@ -1946,7 +1963,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Special check for transition or high motion followed by a // static scene. - if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i, + if (detect_transition_to_still(twopass, i, cpi->key_frame_frequency - i, loop_decay_rate, decay_accumulator)) break; @@ -1999,7 +2016,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the number of bits that should be assigned to the kf group. if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) { // Maximum number of bits for a single normal frame (not key frame). - const int max_bits = frame_max_bits(cpi); + const int max_bits = frame_max_bits(rc, &cpi->oxcf); // Maximum number of bits allocated to the key frame group. int64_t max_grp_bits; @@ -2051,10 +2068,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // How fast is prediction quality decaying. if (!detect_flash(twopass, 0)) { - loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); + const double loop_decay_rate = get_prediction_decay_rate(&cpi->common, + &next_frame); decay_accumulator *= loop_decay_rate; - decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR - ? MIN_DECAY_FACTOR : decay_accumulator; + decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR); } boost_score += (decay_accumulator * r); @@ -2085,7 +2102,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (1) { int kf_boost = (int)boost_score; int allocation_chunks; - int alt_kf_bits; if (kf_boost < (rc->frames_to_key * 3)) kf_boost = (rc->frames_to_key * 3); @@ -2119,14 +2135,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Prevent overflow. if (kf_boost > 1028) { - int divisor = kf_boost >> 10; + const int divisor = kf_boost >> 10; kf_boost /= divisor; allocation_chunks /= divisor; } - twopass->kf_group_bits = (twopass->kf_group_bits < 0) ? 0 - : twopass->kf_group_bits; - + twopass->kf_group_bits = MAX(0, twopass->kf_group_bits); // Calculate the number of bits to be spent on the key frame. twopass->kf_bits = (int)((double)kf_boost * ((double)twopass->kf_group_bits / allocation_chunks)); @@ -2136,11 +2150,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // then use an alternate calculation based on the kf error score // which should give a smaller key frame. if (kf_mod_err < kf_group_err / rc->frames_to_key) { - double alt_kf_grp_bits = ((double)twopass->bits_left * + double alt_kf_grp_bits = ((double)twopass->bits_left * (kf_mod_err * (double)rc->frames_to_key) / DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)); - alt_kf_bits = (int)((double)kf_boost * + const int alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks)); if (twopass->kf_bits > alt_kf_bits) @@ -2149,12 +2163,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Else if it is much harder than other frames in the group make sure // it at least receives an allocation in keeping with its relative // error score. - alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err / + const int alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err / DOUBLE_DIVIDE_CHECK(twopass->modified_error_left))); - if (alt_kf_bits > twopass->kf_bits) { + if (alt_kf_bits > twopass->kf_bits) twopass->kf_bits = alt_kf_bits; - } } twopass->kf_group_bits -= twopass->kf_bits; // Per frame bit target for this frame. @@ -2187,14 +2200,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; struct twopass_rc *const twopass = &cpi->twopass; - const int frames_left = (int)(twopass->total_stats.count - - cm->current_video_frame); + int frames_left; FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; double this_frame_intra_error; double this_frame_coded_error; int target; + LAYER_CONTEXT *lc = NULL; + int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1); + + if (is_spatial_svc) { + lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + frames_left = (int)(twopass->total_stats.count - + lc->current_video_frame_in_layer); + } else { + frames_left = (int)(twopass->total_stats.count - + cm->current_video_frame); + } if (!twopass->stats_in) return; @@ -2207,9 +2230,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { vp9_clear_system_state(); + if (is_spatial_svc && twopass->kf_intra_err_min == 0) { + twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + } + if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { twopass->active_worst_quality = cpi->oxcf.cq_level; - } else if (cm->current_video_frame == 0) { + } else if (cm->current_video_frame == 0 || + (is_spatial_svc && lc->current_video_frame_in_layer == 0)) { // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); @@ -2232,6 +2261,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Define next KF group and assign bits to it. this_frame_copy = this_frame; find_next_key_frame(cpi, &this_frame_copy); + // Don't place key frame in any enhancement layers in spatial svc + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 && + cpi->svc.spatial_layer_id > 0) { + cm->frame_type = INTER_FRAME; + } } else { cm->frame_type = INTER_FRAME; } @@ -2291,23 +2325,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { subtract_stats(&twopass->total_left_stats, &this_frame); } -void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { +void vp9_twopass_postencode_update(VP9_COMP *cpi) { #ifdef DISABLE_RC_LONG_TERM_MEM - cpi->twopass.bits_left -= cpi->rc.this_frame_target; + const uint64_t bits_used = cpi->rc.this_frame_target; #else - cpi->twopass.bits_left -= 8 * bytes_used; + const uint64_t bits_used = cpi->rc.projected_frame_size; +#endif + cpi->twopass.bits_left -= bits_used; + cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0); // Update bits left to the kf and gf groups to account for overshoot or // undershoot on these frames. - if (cm->frame_type == KEY_FRAME) { - cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - - cpi->rc.projected_frame_size; - - cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0); - } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) { - cpi->twopass.gf_group_bits += cpi->rc.this_frame_target - - cpi->rc.projected_frame_size; - + if (cpi->common.frame_type == KEY_FRAME) { + // For key frames kf_group_bits already had the target bits subtracted out. + // So now update to the correct value based on the actual bits used. + cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used; + } else { + cpi->twopass.kf_group_bits -= bits_used; + cpi->twopass.gf_group_bits -= bits_used; cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0); } -#endif + cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0); } diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h index bf7b5a1..7a16c8f 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.h +++ b/libvpx/vp9/encoder/vp9_firstpass.h @@ -35,7 +35,7 @@ typedef struct { double new_mv_count; double duration; double count; - int spatial_layer_id; + int64_t spatial_layer_id; } FIRSTPASS_STATS; struct twopass_rc { @@ -95,8 +95,7 @@ int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh); // Post encode update of the rate control parameters for 2-pass -void vp9_twopass_postencode_update(struct VP9_COMP *cpi, - uint64_t bytes_used); +void vp9_twopass_postencode_update(struct VP9_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_lookahead.c b/libvpx/vp9/encoder/vp9_lookahead.c index a88d5ec..cf03e01 100644 --- a/libvpx/vp9/encoder/vp9_lookahead.c +++ b/libvpx/vp9/encoder/vp9_lookahead.c @@ -28,8 +28,8 @@ struct lookahead_ctx { /* Return the buffer at the given absolute index and increment the index */ -static struct lookahead_entry * pop(struct lookahead_ctx *ctx, - unsigned int *idx) { +static struct lookahead_entry *pop(struct lookahead_ctx *ctx, + unsigned int *idx) { unsigned int index = *idx; struct lookahead_entry *buf = ctx->buf + index; @@ -55,16 +55,19 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { } -struct lookahead_ctx * vp9_lookahead_init(unsigned int width, - unsigned int height, - unsigned int subsampling_x, - unsigned int subsampling_y, - unsigned int depth) { +struct lookahead_ctx *vp9_lookahead_init(unsigned int width, + unsigned int height, + unsigned int subsampling_x, + unsigned int subsampling_y, + unsigned int depth) { struct lookahead_ctx *ctx = NULL; // Clamp the lookahead queue depth depth = clamp(depth, 1, MAX_LAG_BUFFERS); + // Allocate memory to keep previous source frames available. + depth += MAX_PRE_FRAMES; + // Allocate the lookahead structures ctx = calloc(1, sizeof(*ctx)); if (ctx) { @@ -96,7 +99,7 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int mb_cols = (src->y_width + 15) >> 4; #endif - if (ctx->sz + 1 > ctx->max_sz) + if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); @@ -159,11 +162,11 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, } -struct lookahead_entry * vp9_lookahead_pop(struct lookahead_ctx *ctx, - int drain) { +struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx, + int drain) { struct lookahead_entry *buf = NULL; - if (ctx->sz && (drain || ctx->sz == ctx->max_sz)) { + if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { buf = pop(ctx, &ctx->read_idx); ctx->sz--; } @@ -171,16 +174,28 @@ struct lookahead_entry * vp9_lookahead_pop(struct lookahead_ctx *ctx, } -struct lookahead_entry * vp9_lookahead_peek(struct lookahead_ctx *ctx, - int index) { +struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx, + int index) { struct lookahead_entry *buf = NULL; - if (index < (int)ctx->sz) { - index += ctx->read_idx; - if (index >= (int)ctx->max_sz) - index -= ctx->max_sz; - buf = ctx->buf + index; + if (index >= 0) { + // Forward peek + if (index < (int)ctx->sz) { + index += ctx->read_idx; + if (index >= (int)ctx->max_sz) + index -= ctx->max_sz; + buf = ctx->buf + index; + } + } else if (index < 0) { + // Backward peek + if (-index <= MAX_PRE_FRAMES) { + index += ctx->read_idx; + if (index < 0) + index += ctx->max_sz; + buf = ctx->buf + index; + } } + return buf; } diff --git a/libvpx/vp9/encoder/vp9_lookahead.h b/libvpx/vp9/encoder/vp9_lookahead.h index ff63c0d..046c533 100644 --- a/libvpx/vp9/encoder/vp9_lookahead.h +++ b/libvpx/vp9/encoder/vp9_lookahead.h @@ -20,6 +20,9 @@ extern "C" { #define MAX_LAG_BUFFERS 25 +// The max of past frames we want to keep in the queue. +#define MAX_PRE_FRAMES 1 + struct lookahead_entry { YV12_BUFFER_CONFIG img; int64_t ts_start; diff --git a/libvpx/vp9/encoder/vp9_mbgraph.c b/libvpx/vp9/encoder/vp9_mbgraph.c index 6520389..44b171f 100644 --- a/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/libvpx/vp9/encoder/vp9_mbgraph.c @@ -61,8 +61,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, &sse); } - xd->mi_8x8[0]->mbmi.mode = NEWMV; - xd->mi_8x8[0]->mbmi.mv[0].as_mv = *dst_mv; + xd->mi[0]->mbmi.mode = NEWMV; + xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv; vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); @@ -145,7 +145,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, for (mode = DC_PRED; mode <= TM_PRED; mode++) { unsigned int err; - xd->mi_8x8[0]->mbmi.mode = mode; + xd->mi[0]->mbmi.mode = mode; vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode, x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, @@ -252,7 +252,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, xd->plane[0].dst.stride = buf->y_stride; xd->plane[0].pre[0].stride = buf->y_stride; xd->plane[1].dst.stride = buf->uv_stride; - xd->mi_8x8[0] = &mi_local; + xd->mi[0] = &mi_local; mi_local.mbmi.sb_type = BLOCK_16X16; mi_local.mbmi.ref_frame[0] = LAST_FRAME; mi_local.mbmi.ref_frame[1] = NONE; @@ -370,7 +370,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) { else cpi->static_mb_pct = 0; - cpi->seg0_cnt = ncnt[0]; vp9_enable_segmentation(&cm->seg); } else { cpi->static_mb_pct = 0; diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c index 2ae8a2a..f7a02a4 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.c +++ b/libvpx/vp9/encoder/vp9_mcomp.c @@ -23,6 +23,11 @@ // #define NEW_DIAMOND_SEARCH +static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, + const MV *mv) { + return &buf->buf[mv->row * buf->stride + mv->col]; +} + void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); @@ -370,9 +375,9 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, unsigned int sse; unsigned int whichdir; int thismse; - unsigned int halfiters = iters_per_step; - unsigned int quarteriters = iters_per_step; - unsigned int eighthiters = iters_per_step; + const unsigned int halfiters = iters_per_step; + const unsigned int quarteriters = iters_per_step; + const unsigned int eighthiters = iters_per_step; DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); const int y_stride = xd->plane[0].pre[0].stride; @@ -399,7 +404,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, // calculate central point error // TODO(yunqingwang): central pointer error was already calculated in full- // pixel search, and can be passed in this function. - comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -495,8 +500,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, - int do_init_search, - int do_refine, + int do_init_search, int do_refine, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv, @@ -508,20 +512,15 @@ static int vp9_pattern_search(const MACROBLOCK *x, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, }; int i, j, s, t; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; int br, bc; - MV this_mv; int bestsad = INT_MAX; int thissad; - const uint8_t *base_offset; - const uint8_t *this_offset; int k = -1; - int best_site = -1; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_init_s = search_param_to_steps[search_param]; - const int *mvjsadcost = x->nmvjointsadcost; + const int *const mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; // adjust ref_mv to make sure it is within MV range @@ -530,13 +529,10 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc = ref_mv->col; // Work out the start point for the search - base_offset = xd->plane[0].pre[0].buf; - this_offset = base_offset + (br * in_what_stride) + bc; - this_mv.row = br; - this_mv.col = bc; - bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) - + mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + bestsad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, + 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of @@ -545,27 +541,25 @@ static int vp9_pattern_search(const MACROBLOCK *x, s = best_init_s; best_init_s = -1; for (t = 0; t <= s; ++t) { - best_site = -1; + int best_site = -1; if (check_bounds(x, br, bc, 1 << t)) { for (i = 0; i < num_candidates[t]; i++) { - this_mv.row = br + candidates[t][i].row; - this_mv.col = bc + candidates[t][i].col; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + candidates[t][i].row, + bc + candidates[t][i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < num_candidates[t]; i++) { - this_mv.row = br + candidates[t][i].row; - this_mv.col = bc + candidates[t][i].col; + const MV this_mv = {br + candidates[t][i].row, + bc + candidates[t][i].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -585,31 +579,30 @@ static int vp9_pattern_search(const MACROBLOCK *x, // If the center point is still the best, just skip this and move to // the refinement step. if (best_init_s != -1) { + int best_site = -1; s = best_init_s; - best_site = -1; + do { // No need to search all 6 points the 1st time if initial search was used if (!do_init_search || s != best_init_s) { if (check_bounds(x, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { - this_mv.row = br + candidates[s][i].row; - this_mv.col = bc + candidates[s][i].col; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < num_candidates[s]; i++) { - this_mv.row = br + candidates[s][i].row; - this_mv.col = bc + candidates[s][i].col; + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -632,24 +625,22 @@ static int vp9_pattern_search(const MACROBLOCK *x, if (check_bounds(x, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; - this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; - this_offset = base_offset + (this_mv.row * (in_what_stride)) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; - this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + (this_mv.row * (in_what_stride)) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -666,29 +657,28 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Check 4 1-away neighbors if do_refine is true. // For most well-designed schemes do_refine will not be necessary. if (do_refine) { - static const MV neighbors[4] = { {0, -1}, { -1, 0}, {1, 0}, {0, 1} }; + static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; + for (j = 0; j < 16; j++) { - best_site = -1; + int best_site = -1; if (check_bounds(x, br, bc, 1)) { for (i = 0; i < 4; i++) { - this_mv.row = br + neighbors[i].row; - this_mv.col = bc + neighbors[i].col; - this_offset = base_offset + this_mv.row * in_what_stride + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < 4; i++) { - this_mv.row = br + neighbors[i].row; - this_mv.col = bc + neighbors[i].col; + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + this_mv.row * in_what_stride + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -705,8 +695,6 @@ static int vp9_pattern_search(const MACROBLOCK *x, best_mv->row = br; best_mv->col = bc; - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; return bestsad; } @@ -714,41 +702,32 @@ int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { - unsigned int unused; - const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *base_offset = xd->plane[0].pre[0].buf; - const uint8_t *this_offset = &base_offset[best_mv->row * in_what_stride + - best_mv->col]; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV mv = {best_mv->row * 8, best_mv->col * 8}; - return vfp->vf(what, what_stride, this_offset, in_what_stride, &unused) + + unsigned int unused; + + return vfp->vf(what->buf, what->stride, + get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, x->errorperbit) : 0); } int vp9_get_mvpred_av_var(const MACROBLOCK *x, - MV *best_mv, - const MV *center_mv, + const MV *best_mv, const MV *center_mv, const uint8_t *second_pred, const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { - unsigned int bestsad; - MV this_mv; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *base_offset = xd->plane[0].pre[0].buf; - const uint8_t *this_offset = base_offset + (best_mv->row * in_what_stride) + - best_mv->col; - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; - return vfp->svaf(this_offset, in_what_stride, 0, 0, what, what_stride, - &bestsad, second_pred) + - (use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost, + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const MV mv = {best_mv->row * 8, best_mv->col * 8}; + unsigned int unused; + + return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, + what->buf, what->stride, &unused, second_pred) + + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, x->errorperbit) : 0); } @@ -908,7 +887,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, const int what_stride = x->plane[0].src.stride; const uint8_t *in_what; const int in_what_stride = xd->plane[0].pre[0].stride; - MV this_mv; unsigned int bestsad = INT_MAX; int ref_row, ref_col; @@ -960,8 +938,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, for (i = 0; i < 4; ++i) { if (sad_array[i] < bestsad) { - this_mv.row = ref_row + tr; - this_mv.col = ref_col + tc + i; + const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad = sad_array[i] + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -979,8 +956,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, bestsad); if (thissad < bestsad) { - this_mv.row = ref_row + tr; - this_mv.col = ref_col + tc + i; + const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1005,66 +981,49 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], const MV *center_mv) { - int i, j, step; - const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row, ref_col; - + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; // search_param determines the length of the initial step and hence the number // of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = // (MAX_FIRST_STEP/4) pel... etc. const search_site *const ss = &x->ss[search_param * x->searches_per_step]; const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; - const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; + const uint8_t *best_address; + int best_sad = INT_MAX; + int best_site = 0; + int last_site = 0; + int i, j, step; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - ref_row = ref_mv->row; - ref_col = ref_mv->col; + best_address = get_buf_from_mv(in_what, ref_mv); *num00 = 0; - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Work out the start point for the search - in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; - best_address = in_what; + *best_mv = *ref_mv; // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + best_sad = fn_ptr->sdf(what->buf, what->stride, + in_what->buf, in_what->stride, 0x7fffffff) + + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); i = 1; for (step = 0; step < tot_steps; step++) { for (j = 0; j < x->searches_per_step; j++) { - const MV this_mv = {best_mv->row + ss[i].mv.row, - best_mv->col + ss[i].mv.col}; - if (is_mv_in(x, &this_mv)) { - const uint8_t *const check_here = ss[i].offset + best_address; - int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); - - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + const MV mv = {best_mv->row + ss[i].mv.row, + best_mv->col + ss[i].mv.col}; + if (is_mv_in(x, &mv)) { + int sad = fn_ptr->sdf(what->buf, what->stride, + best_address + ss[i].offset, in_what->stride, + best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + sad_per_bit); + if (sad < best_sad) { + best_sad = sad; best_site = i; } } @@ -1083,14 +1042,14 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const MV this_mv = {best_mv->row + ss[best_site].mv.row, best_mv->col + ss[best_site].mv.col}; if (is_mv_in(x, &this_mv)) { - const uint8_t *const check_here = ss[best_site].offset + best_address; - int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; + int sad = fn_ptr->sdf(what->buf, what->stride, + best_address + ss[best_site].offset, + in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; best_mv->row += ss[best_site].mv.row; best_mv->col += ss[best_site].mv.col; best_address += ss[best_site].offset; @@ -1101,11 +1060,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, break; }; #endif - } else if (best_address == in_what) { + } else if (best_address == in_what->buf) { (*num00)++; } } - return bestsad; + return best_sad; } int vp9_diamond_search_sadx4(const MACROBLOCK *x, @@ -1331,10 +1290,8 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const MV *center_mv, MV *best_mv) { int r, c; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *const what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); @@ -1342,25 +1299,22 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; - int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, - 0x7fffffff) + + int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); *best_mv = *ref_mv; for (r = row_min; r < row_max; ++r) { for (c = col_min; c < col_max; ++c) { - const MV this_mv = {r, c}; - const uint8_t *check_here = &in_what[r * in_what_stride + c]; - const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - best_sad) + - mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + const MV mv = {r, c}; + const int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + + mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + sad_per_bit); if (sad < best_sad) { best_sad = sad; - *best_mv = this_mv; + *best_mv = mv; } } } @@ -1472,7 +1426,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, MV this_mv; unsigned int bestsad = INT_MAX; int r, c; - unsigned int thissad; int ref_row = ref_mv->row; int ref_col = ref_mv->col; @@ -1512,7 +1465,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); for (i = 0; i < 8; i++) { - thissad = (unsigned int)sad_array8[i]; + unsigned int thissad = (unsigned int)sad_array8[i]; if (thissad < bestsad) { this_mv.col = c; @@ -1537,12 +1490,12 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); for (i = 0; i < 3; i++) { - thissad = sad_array[i]; + unsigned int thissad = sad_array[i]; if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1557,8 +1510,8 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, } while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); + unsigned int thissad = fn_ptr->sdf(what, what_stride, + check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.col = c; @@ -1585,41 +1538,34 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], const MV *center_mv) { - const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; - int i, j; - - const int what_stride = x->plane[0].src.stride; - const uint8_t *const what = x->plane[0].src.buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, - in_what_stride, 0x7fffffff) + + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), + in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; i++) { int best_site = -1; for (j = 0; j < 4; j++) { - const MV this_mv = {ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col}; - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + - this_mv.col]; - unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + error_per_bit); + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1633,7 +1579,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, ref_mv->col += neighbors[best_site].col; } } - return bestsad; + return best_sad; } int vp9_refining_search_sadx4(const MACROBLOCK *x, @@ -1643,74 +1589,64 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, int *mvjcost, int *mvcost[2], const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; - MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; - int i, j; - - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *what = x->plane[0].src.buf; - const uint8_t *best_address = xd->plane[0].pre[0].buf + - (ref_mv->row * xd->plane[0].pre[0].stride) + - ref_mv->col; - + const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, - in_what_stride, 0x7fffffff) + + const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, + in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; i++) { int best_site = -1; - int all_in = ((ref_mv->row - 1) > x->mv_row_min) & - ((ref_mv->row + 1) < x->mv_row_max) & - ((ref_mv->col - 1) > x->mv_col_min) & - ((ref_mv->col + 1) < x->mv_col_max); + const int all_in = ((ref_mv->row - 1) > x->mv_row_min) & + ((ref_mv->row + 1) < x->mv_row_max) & + ((ref_mv->col - 1) > x->mv_col_min) & + ((ref_mv->col + 1) < x->mv_col_max); if (all_in) { - unsigned int sad_array[4]; - uint8_t const *block_offset[4] = { - best_address - in_what_stride, + unsigned int sads[4]; + const uint8_t *const positions[4] = { + best_address - in_what->stride, best_address - 1, best_address + 1, - best_address + in_what_stride + best_address + in_what->stride }; - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, - sad_array); + fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); - for (j = 0; j < 4; j++) { - if (sad_array[j] < bestsad) { - const MV this_mv = {ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col}; - sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, + for (j = 0; j < 4; ++j) { + if (sads[j] < best_sad) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + sads[j] += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); - if (sad_array[j] < bestsad) { - bestsad = sad_array[j]; + if (sads[j] < best_sad) { + best_sad = sads[j]; best_site = j; } } } } else { - for (j = 0; j < 4; j++) { - const MV this_mv = {ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col}; - - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = neighbors[j].row * in_what_stride + - neighbors[j].col + best_address; - unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); - - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + for (j = 0; j < 4; ++j) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), + in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, + mvjsadcost, mvsadcost, error_per_bit); + + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1723,12 +1659,11 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, } else { ref_mv->row += neighbors[best_site].row; ref_mv->col += neighbors[best_site].col; - best_address += (neighbors[best_site].row) * in_what_stride + - neighbors[best_site].col; + best_address = get_buf_from_mv(in_what, ref_mv); } } - return bestsad; + return best_sad; } // This function is called when we do joint motion search in comp_inter_inter @@ -1740,48 +1675,36 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, int *mvjcost, int *mvcost[2], const MV *center_mv, const uint8_t *second_pred, int w, int h) { - const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; - int i, j; - - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; - unsigned int thissad; - MV this_mv; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - /* Get compound pred by averaging two pred blocks. */ - unsigned int bestsad = fn_ptr->sdaf(what, what_stride, - best_address, in_what_stride, - second_pred, 0x7fffffff) + + unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, + second_pred, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; ++i) { int best_site = -1; - for (j = 0; j < 8; j++) { - this_mv.row = ref_mv->row + neighbors[j].row; - this_mv.col = ref_mv->col + neighbors[j].col; - - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + - this_mv.col]; + for (j = 0; j < 8; ++j) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; - thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, - second_pred, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, + second_pred, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); - if (thissad < bestsad) { - bestsad = thissad; + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1795,5 +1718,5 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, ref_mv->col += neighbors[best_site].col; } } - return bestsad; + return best_sad; } diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h index 917de75..f7b7c5e 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.h +++ b/libvpx/vp9/encoder/vp9_mcomp.h @@ -42,8 +42,7 @@ int vp9_get_mvpred_var(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *vfp, int use_mvcost); int vp9_get_mvpred_av_var(const MACROBLOCK *x, - MV *best_mv, - const MV *center_mv, + const MV *best_mv, const MV *center_mv, const uint8_t *second_pred, const vp9_variance_fn_ptr_t *vfp, int use_mvcost); diff --git a/libvpx/vp9/encoder/vp9_onyx_if.c b/libvpx/vp9/encoder/vp9_onyx_if.c index cccc1a9..3619ec8 100644 --- a/libvpx/vp9/encoder/vp9_onyx_if.c +++ b/libvpx/vp9/encoder/vp9_onyx_if.c @@ -27,8 +27,10 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_aq_complexity.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_bitstream.h" -#include "vp9/encoder/vp9_craq.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_firstpass.h" @@ -38,17 +40,14 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" +#include "vp9/encoder/vp9_speed_features.h" +#if CONFIG_INTERNAL_STATS +#include "vp9/encoder/vp9_ssim.h" +#endif #include "vp9/encoder/vp9_temporal_filter.h" -#include "vp9/encoder/vp9_vaq.h" #include "vp9/encoder/vp9_resize.h" #include "vp9/encoder/vp9_svc_layercontext.h" -#define ALL_INTRA_MODES 0x3FF -#define INTRA_DC_ONLY 0x01 -#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) -#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED)) -#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) - void vp9_coef_tree_initialize(); #define DEFAULT_INTERP_FILTER SWITCHABLE @@ -62,30 +61,11 @@ void vp9_coef_tree_initialize(); // now so that HIGH_PRECISION is always // chosen. -// Masks for partially or completely disabling split mode -#define DISABLE_ALL_SPLIT 0x3F -#define DISABLE_ALL_INTER_SPLIT 0x1F -#define DISABLE_COMPOUND_SPLIT 0x18 -#define LAST_AND_INTRA_SPLIT_ONLY 0x1E - // Max rate target for 1080P and below encodes under normal circumstances // (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB #define MAX_MB_RATE 250 #define MAXRATE_1080P 2025000 -#if CONFIG_INTERNAL_STATS -extern double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, int lumamask, - double *weight); - - -extern double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, double *ssim_y, - double *ssim_u, double *ssim_v); - - -#endif - // #define OUTPUT_YUV_REC #ifdef OUTPUT_YUV_SRC @@ -103,9 +83,6 @@ FILE *keyfile; void vp9_init_quantizer(VP9_COMP *cpi); -static const double in_frame_q_adj_ratio[MAX_SEGMENTS] = - {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; - static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { switch (mode) { case NORMAL: @@ -144,17 +121,33 @@ static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { } } +static void setup_key_frame(VP9_COMP *cpi) { + vp9_setup_past_independence(&cpi->common); + + // All buffers are implicitly updated on key frames. + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; +} + +static void setup_inter_frame(VP9_COMMON *cm) { + if (cm->error_resilient_mode || cm->intra_only) + vp9_setup_past_independence(cm); + + assert(cm->frame_context_idx < FRAME_CONTEXTS); + cm->fc = cm->frame_contexts[cm->frame_context_idx]; +} + void vp9_initialize_enc() { static int init_done = 0; if (!init_done) { - vp9_initialize_common(); + vp9_init_neighbors(); + vp9_init_quant_tables(); + vp9_coef_tree_initialize(); vp9_tokenize_initialize(); - vp9_init_quant_tables(); vp9_init_me_luts(); vp9_rc_init_minq_luts(); - // init_base_skip_probs(); vp9_entropy_mv_init(); vp9_entropy_mode_init(); init_done = 1; @@ -163,6 +156,7 @@ void vp9_initialize_enc() { static void dealloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + int i; // Delete sementation map vpx_free(cpi->segmentation_map); @@ -173,16 +167,19 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { cpi->coding_context.last_frame_seg_map_copy = NULL; vpx_free(cpi->complexity_map); - cpi->complexity_map = 0; - vpx_free(cpi->cyclic_refresh.map); - cpi->cyclic_refresh.map = 0; + cpi->complexity_map = NULL; + + vp9_cyclic_refresh_free(cpi->cyclic_refresh); + cpi->cyclic_refresh = NULL; + vpx_free(cpi->active_map); - cpi->active_map = 0; + cpi->active_map = NULL; vp9_free_frame_buffers(cm); vp9_free_frame_buffer(&cpi->last_frame_uf); vp9_free_frame_buffer(&cpi->scaled_source); + vp9_free_frame_buffer(&cpi->scaled_last_source); vp9_free_frame_buffer(&cpi->alt_ref_buffer); vp9_lookahead_destroy(cpi->lookahead); @@ -195,103 +192,59 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->mb_norm_activity_map); cpi->mb_norm_activity_map = 0; - vpx_free(cpi->above_context[0]); - cpi->above_context[0] = NULL; - - vpx_free(cpi->above_seg_context); - cpi->above_seg_context = NULL; -} - -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a target q value -int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget) { - const RATE_CONTROL *const rc = &cpi->rc; - int start_index = rc->worst_quality; - int target_index = rc->worst_quality; - int i; - - // Convert the average q value to an index. - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - start_index = i; - if (vp9_convert_qindex_to_q(i) >= qstart) - break; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i]; + vpx_free(lc->rc_twopass_stats_in.buf); + lc->rc_twopass_stats_in.buf = NULL; + lc->rc_twopass_stats_in.sz = 0; } - - // Convert the q target to an index - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - target_index = i; - if (vp9_convert_qindex_to_q(i) >= qtarget) - break; - } - - return target_index - start_index; } -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a value that should equate to the given rate ratio. -int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, - double rate_target_ratio) { - int i; - int target_index = cpi->rc.worst_quality; +static void save_coding_context(VP9_COMP *cpi) { + CODING_CONTEXT *const cc = &cpi->coding_context; + VP9_COMMON *cm = &cpi->common; - // Look up the current projected bits per block for the base index - const int base_bits_per_mb = vp9_rc_bits_per_mb(cpi->common.frame_type, - base_q_index, 1.0); + // Stores a snapshot of key state variables which can subsequently be + // restored with a call to vp9_restore_coding_context. These functions are + // intended for use in a re-code loop in vp9_compress_frame where the + // quantizer value is adjusted between loop iterations. + vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); + vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts); + vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); - // Find the target bits per mb based on the base value and given ratio. - const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); + vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); - // Convert the q target to an index - for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; ++i) { - target_index = i; - if (vp9_rc_bits_per_mb(cpi->common.frame_type, i, 1.0) <= - target_bits_per_mb ) - break; - } + vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, + cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); - return target_index - base_q_index; -} - -// This function sets up a set of segments with delta Q values around -// the baseline frame quantizer. -static void setup_in_frame_q_adj(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - struct segmentation *const seg = &cm->seg; - - // Make SURE use of floating point in this function is safe. - vp9_clear_system_state(); + vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); + vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); - if (cm->frame_type == KEY_FRAME || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - int segment; + cc->fc = cm->fc; +} - // Clear down the segment map - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); +static void restore_coding_context(VP9_COMP *cpi) { + CODING_CONTEXT *const cc = &cpi->coding_context; + VP9_COMMON *cm = &cpi->common; - // Clear down the complexity map used for rd - vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols); + // Restore key state variables to the snapshot state stored in the + // previous call to vp9_save_coding_context. + vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); + vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts); + vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); - vp9_enable_segmentation(seg); - vp9_clearall_segfeatures(seg); + vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); - // Select delta coding method - seg->abs_delta = SEGMENT_DELTADATA; + vpx_memcpy(cm->last_frame_seg_map, + cpi->coding_context.last_frame_seg_map_copy, + (cm->mi_rows * cm->mi_cols)); - // Segment 0 "Q" feature is disabled so it defaults to the baseline Q - vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); + vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); + vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); - // Use some of the segments for in frame Q adjustment - for (segment = 1; segment < 2; segment++) { - const int qindex_delta = - vp9_compute_qdelta_by_rate(cpi, - cm->base_qindex, - in_frame_q_adj_ratio[segment]); - vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); - vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); - } - } + cm->fc = cc->fc; } + static void configure_static_seg_features(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; @@ -335,7 +288,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_map = 1; seg->update_data = 1; - qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 0.875); + qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); @@ -356,7 +309,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_data = 1; seg->abs_delta = SEGMENT_DELTADATA; - qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 1.125); + qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); @@ -446,7 +399,7 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) { uint8_t *cache = cache_ptr; for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++) cache[0] = mi_8x8[0]->mbmi.segment_id; - mi_8x8_ptr += cm->mode_info_stride; + mi_8x8_ptr += cm->mi_stride; cache_ptr += cm->mi_cols; } } @@ -455,557 +408,137 @@ static int is_slowest_mode(int mode) { } static void set_rd_speed_thresholds(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; int i; // Set baseline threshold values for (i = 0; i < MAX_MODES; ++i) - sf->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTG] = 0; - sf->thresh_mult[THR_NEARESTA] = 0; - - sf->thresh_mult[THR_DC] += 1000; - - sf->thresh_mult[THR_NEWMV] += 1000; - sf->thresh_mult[THR_NEWA] += 1000; - sf->thresh_mult[THR_NEWG] += 1000; - - sf->thresh_mult[THR_NEARMV] += 1000; - sf->thresh_mult[THR_NEARA] += 1000; - sf->thresh_mult[THR_COMP_NEARESTLA] += 1000; - sf->thresh_mult[THR_COMP_NEARESTGA] += 1000; - - sf->thresh_mult[THR_TM] += 1000; - - sf->thresh_mult[THR_COMP_NEARLA] += 1500; - sf->thresh_mult[THR_COMP_NEWLA] += 2000; - sf->thresh_mult[THR_NEARG] += 1000; - sf->thresh_mult[THR_COMP_NEARGA] += 1500; - sf->thresh_mult[THR_COMP_NEWGA] += 2000; - - sf->thresh_mult[THR_ZEROMV] += 2000; - sf->thresh_mult[THR_ZEROG] += 2000; - sf->thresh_mult[THR_ZEROA] += 2000; - sf->thresh_mult[THR_COMP_ZEROLA] += 2500; - sf->thresh_mult[THR_COMP_ZEROGA] += 2500; - - sf->thresh_mult[THR_H_PRED] += 2000; - sf->thresh_mult[THR_V_PRED] += 2000; - sf->thresh_mult[THR_D45_PRED ] += 2500; - sf->thresh_mult[THR_D135_PRED] += 2500; - sf->thresh_mult[THR_D117_PRED] += 2500; - sf->thresh_mult[THR_D153_PRED] += 2500; - sf->thresh_mult[THR_D207_PRED] += 2500; - sf->thresh_mult[THR_D63_PRED] += 2500; + cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + + cpi->rd_thresh_mult[THR_NEARESTMV] = 0; + cpi->rd_thresh_mult[THR_NEARESTG] = 0; + cpi->rd_thresh_mult[THR_NEARESTA] = 0; + + cpi->rd_thresh_mult[THR_DC] += 1000; + + cpi->rd_thresh_mult[THR_NEWMV] += 1000; + cpi->rd_thresh_mult[THR_NEWA] += 1000; + cpi->rd_thresh_mult[THR_NEWG] += 1000; + + cpi->rd_thresh_mult[THR_NEARMV] += 1000; + cpi->rd_thresh_mult[THR_NEARA] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000; + + cpi->rd_thresh_mult[THR_TM] += 1000; + + cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500; + cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000; + cpi->rd_thresh_mult[THR_NEARG] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500; + cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000; + + cpi->rd_thresh_mult[THR_ZEROMV] += 2000; + cpi->rd_thresh_mult[THR_ZEROG] += 2000; + cpi->rd_thresh_mult[THR_ZEROA] += 2000; + cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500; + cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500; + + cpi->rd_thresh_mult[THR_H_PRED] += 2000; + cpi->rd_thresh_mult[THR_V_PRED] += 2000; + cpi->rd_thresh_mult[THR_D45_PRED ] += 2500; + cpi->rd_thresh_mult[THR_D135_PRED] += 2500; + cpi->rd_thresh_mult[THR_D117_PRED] += 2500; + cpi->rd_thresh_mult[THR_D153_PRED] += 2500; + cpi->rd_thresh_mult[THR_D207_PRED] += 2500; + cpi->rd_thresh_mult[THR_D63_PRED] += 2500; /* disable frame modes if flags not set */ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { - sf->thresh_mult[THR_NEWMV ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV] = INT_MAX; - sf->thresh_mult[THR_ZEROMV ] = INT_MAX; - sf->thresh_mult[THR_NEARMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARMV ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { - sf->thresh_mult[THR_NEARESTG ] = INT_MAX; - sf->thresh_mult[THR_ZEROG ] = INT_MAX; - sf->thresh_mult[THR_NEARG ] = INT_MAX; - sf->thresh_mult[THR_NEWG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWG ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { - sf->thresh_mult[THR_NEARESTA ] = INT_MAX; - sf->thresh_mult[THR_ZEROA ] = INT_MAX; - sf->thresh_mult[THR_NEARA ] = INT_MAX; - sf->thresh_mult[THR_NEWA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEWLA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEWGA ] = INT_MAX; } } static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; + const SPEED_FEATURES *const sf = &cpi->sf; int i; for (i = 0; i < MAX_REFS; ++i) - sf->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - sf->thresh_mult_sub8x8[THR_LAST] += 2500; - sf->thresh_mult_sub8x8[THR_GOLD] += 2500; - sf->thresh_mult_sub8x8[THR_ALTR] += 2500; - sf->thresh_mult_sub8x8[THR_INTRA] += 2500; - sf->thresh_mult_sub8x8[THR_COMP_LA] += 4500; - sf->thresh_mult_sub8x8[THR_COMP_GA] += 4500; + cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500; + cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500; // Check for masked out split cases. - for (i = 0; i < MAX_REFS; i++) { + for (i = 0; i < MAX_REFS; i++) if (sf->disable_split_mask & (1 << i)) - sf->thresh_mult_sub8x8[i] = INT_MAX; - } + cpi->rd_thresh_mult_sub8x8[i] = INT_MAX; // disable mode test if frame flag is not set if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) - sf->thresh_mult_sub8x8[THR_LAST] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) - sf->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; } -static void set_good_speed_feature(VP9_COMMON *cm, - SPEED_FEATURES *sf, - int speed) { - int i; - sf->adaptive_rd_thresh = 1; - sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_split_var_thresh = 32; - sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->use_fast_coef_costing = 1; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed == 4) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 200; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->use_fast_coef_costing = 1; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed >= 5) { - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->partition_search_type = FIXED_PARTITION; - sf->tx_size_search_method = frame_is_intra_only(cm) ? - USE_FULL_RD : USE_LARGESTALL; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - sf->use_rd_breakout = 1; - sf->use_lp32x32fdct = 1; - sf->optimize_coefficients = 0; - sf->auto_mv_step_size = 1; - sf->reference_masking = 1; - - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->search_method = HEX; - sf->subpel_iters_per_step = 1; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 500; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->use_fast_coef_updates = 2; - sf->use_fast_coef_costing = 1; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } -} - -static void set_rt_speed_feature(VP9_COMMON *cm, - SPEED_FEATURES *sf, - int speed) { - sf->static_segmentation = 0; - sf->adaptive_rd_thresh = 1; - sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); - sf->encode_breakout_thresh = 1; - sf->use_fast_coef_costing = 1; - - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 8; - } - if (speed >= 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->auto_mv_step_size = 1; - sf->reference_masking = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 200; - } - if (speed >= 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; - - sf->disable_filter_search_var_thresh = 100; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - sf->encode_breakout_thresh = 400; - } - if (speed >= 4) { - sf->optimize_coefficients = 0; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->use_fast_lpf_pick = 2; - sf->encode_breakout_thresh = 700; - } - if (speed >= 5) { - int i; - sf->last_partitioning_redo_frequency = 4; - sf->adaptive_rd_thresh = 5; - sf->use_fast_coef_costing = 0; - sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; - sf->adjust_partitioning_from_last_frame = - cm->last_frame_type != cm->frame_type || (0 == - (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency); - sf->subpel_force_stop = 1; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; - sf->frame_parameter_update = 0; - sf->encode_breakout_thresh = 1000; - sf->search_method = FAST_HEX; - sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); - sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->max_intra_bsize = BLOCK_32X32; - } - if (speed >= 6) { - sf->partition_check = - (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1); - sf->partition_search_type = REFERENCE_PARTITION; - sf->use_nonrd_pick_mode = 1; - sf->search_method = FAST_DIAMOND; - } - if (speed >= 7) { - sf->partition_search_type = VAR_BASED_FIXED_PARTITION; - sf->use_nonrd_pick_mode = 1; - sf->search_method = FAST_DIAMOND; - } - if (speed >= 8) { - int i; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 14; // only search NEARESTMV (0) - } -} - -void vp9_set_speed_features(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; - VP9_COMMON *cm = &cpi->common; - int speed = cpi->speed; - int i; - - // Convert negative speed to positive - if (speed < 0) - speed = -speed; - +static void set_speed_features(VP9_COMP *cpi) { #if CONFIG_INTERNAL_STATS + int i; for (i = 0; i < MAX_MODES; ++i) cpi->mode_chosen_counts[i] = 0; #endif - // best quality defaults - sf->frame_parameter_update = 1; - sf->search_method = NSTEP; - sf->recode_loop = ALLOW_RECODE; - sf->subpel_search_method = SUBPEL_TREE; - sf->subpel_iters_per_step = 2; - sf->subpel_force_stop = 0; - sf->optimize_coefficients = !cpi->oxcf.lossless; - sf->reduce_first_step_size = 0; - sf->auto_mv_step_size = 0; - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->comp_inter_joint_search_thresh = BLOCK_4X4; - sf->adaptive_rd_thresh = 0; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; - sf->tx_size_search_method = USE_FULL_RD; - sf->use_lp32x32fdct = 0; - sf->adaptive_motion_search = 0; - sf->adaptive_pred_interp_filter = 0; - sf->reference_masking = 0; - sf->partition_search_type = SEARCH_PARTITION; - sf->less_rectangular_check = 0; - sf->use_square_partition_only = 0; - sf->auto_min_max_partition_size = NOT_IN_USE; - sf->max_partition_size = BLOCK_64X64; - sf->min_partition_size = BLOCK_4X4; - sf->adjust_partitioning_from_last_frame = 0; - sf->last_partitioning_redo_frequency = 4; - sf->disable_split_mask = 0; - sf->mode_search_skip_flags = 0; - sf->disable_split_var_thresh = 0; - sf->disable_filter_search_var_thresh = 0; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; - sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; - } - sf->use_rd_breakout = 0; - sf->skip_encode_sb = 0; - sf->use_uv_intra_rd_estimate = 0; - sf->use_fast_lpf_pick = 0; - sf->use_fast_coef_updates = 0; - sf->use_fast_coef_costing = 0; - sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set - sf->use_nonrd_pick_mode = 0; - sf->encode_breakout_thresh = 0; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 0; - sf->max_intra_bsize = BLOCK_64X64; - // This setting only takes effect when partition_search_type is set - // to FIXED_PARTITION. - sf->always_this_block_size = BLOCK_16X16; - - switch (cpi->oxcf.mode) { - case MODE_BESTQUALITY: - case MODE_SECONDPASS_BEST: // This is the best quality mode. - cpi->diamond_search_sad = vp9_full_range_search; - break; - case MODE_FIRSTPASS: - case MODE_GOODQUALITY: - case MODE_SECONDPASS: - set_good_speed_feature(cm, sf, speed); - break; - case MODE_REALTIME: - set_rt_speed_feature(cm, sf, speed); - break; - }; /* switch */ + vp9_set_speed_features(cpi); // Set rd thresholds based on mode and speed setting set_rd_speed_thresholds(cpi); set_rd_speed_thresholds_sub8x8(cpi); - // Slow quant, dct and trellis not worthwhile for first pass - // so make sure they are always turned off. - if (cpi->pass == 1) { - sf->optimize_coefficients = 0; - } - - // No recode for 1 pass. - if (cpi->pass == 0) { - sf->recode_loop = DISALLOW_RECODE; - sf->optimize_coefficients = 0; - } - cpi->mb.fwd_txm4x4 = vp9_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { cpi->mb.fwd_txm4x4 = vp9_fwht4x4; } - - if (cpi->sf.subpel_search_method == SUBPEL_TREE) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; - cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; - } - - cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; - - if (cpi->encode_breakout && cpi->oxcf.mode == MODE_REALTIME && - sf->encode_breakout_thresh > cpi->encode_breakout) - cpi->encode_breakout = sf->encode_breakout_thresh; - - if (sf->disable_split_mask == DISABLE_ALL_SPLIT) - sf->adaptive_pred_interp_filter = 0; } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { @@ -1048,6 +581,13 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); + if (vp9_alloc_frame_buffer(&cpi->scaled_last_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate scaled last source buffer"); + vpx_free(cpi->tok); { @@ -1065,24 +605,12 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { CHECK_MEM_ERROR(cm, cpi->mb_norm_activity_map, vpx_calloc(sizeof(unsigned int), cm->mb_rows * cm->mb_cols)); - - // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm - // block where mi unit size is 8x8. - vpx_free(cpi->above_context[0]); - CHECK_MEM_ERROR(cm, cpi->above_context[0], - vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) * - MAX_MB_PLANE, - sizeof(*cpi->above_context[0]))); - - vpx_free(cpi->above_seg_context); - CHECK_MEM_ERROR(cm, cpi->above_seg_context, - vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols), - sizeof(*cpi->above_seg_context))); } static void update_frame_size(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; vp9_update_frame_size(cm); @@ -1101,6 +629,13 @@ static void update_frame_size(VP9_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to reallocate scaled source buffer"); + if (vp9_realloc_frame_buffer(&cpi->scaled_last_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to reallocate scaled last source buffer"); + { int y_stride = cpi->scaled_source.y_stride; @@ -1111,14 +646,7 @@ static void update_frame_size(VP9_COMP *cpi) { } } - { - int i; - for (i = 1; i < MAX_MB_PLANE; ++i) { - cpi->above_context[i] = cpi->above_context[0] + - i * sizeof(*cpi->above_context[0]) * 2 * - mi_cols_aligned_to_sb(cm->mi_cols); - } - } + init_macroblockd(cm, xd); } // Table that converts 0-63 Q range values passed in outside to the Qindex @@ -1153,10 +681,9 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate) { oxcf->framerate = framerate < 0.1 ? 30 : framerate; cpi->output_framerate = cpi->oxcf.framerate; rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / - cpi->output_framerate); + cpi->output_framerate); rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth * - oxcf->two_pass_vbrmin_section / 100); - + oxcf->two_pass_vbrmin_section / 100); rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); @@ -1213,12 +740,12 @@ static void set_tile_limits(VP9_COMP *cpi) { static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { VP9_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; int i; cpi->oxcf = *oxcf; - cm->version = oxcf->version; + cm->profile = oxcf->profile; + cm->bit_depth = oxcf->bit_depth; cm->width = oxcf->width; cm->height = oxcf->height; @@ -1231,43 +758,16 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { // Temporal scalability. cpi->svc.number_temporal_layers = oxcf->ts_number_layers; - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && + cpi->oxcf.mode == MODE_SECONDPASS_BEST)) { vp9_init_layer_context(cpi); } // change includes all joint functionality vp9_change_config(cpi, oxcf); - // Initialize active best and worst q and average q values. - if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - rc->avg_frame_qindex[0] = cpi->oxcf.worst_allowed_q; - rc->avg_frame_qindex[1] = cpi->oxcf.worst_allowed_q; - rc->avg_frame_qindex[2] = cpi->oxcf.worst_allowed_q; - } else { - rc->avg_frame_qindex[0] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - rc->avg_frame_qindex[1] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - rc->avg_frame_qindex[2] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - } - rc->last_q[0] = cpi->oxcf.best_allowed_q; - rc->last_q[1] = cpi->oxcf.best_allowed_q; - rc->last_q[2] = cpi->oxcf.best_allowed_q; - - // Initialise the starting buffer levels - rc->buffer_level = cpi->oxcf.starting_buffer_level; - rc->bits_off_target = cpi->oxcf.starting_buffer_level; - - rc->rolling_target_bits = rc->av_per_frame_bandwidth; - rc->rolling_actual_bits = rc->av_per_frame_bandwidth; - rc->long_rolling_target_bits = rc->av_per_frame_bandwidth; - rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth; - - rc->total_actual_bits = 0; - rc->total_target_vs_actual = 0; - cpi->static_mb_pct = 0; cpi->lst_fb_idx = 0; @@ -1281,15 +781,18 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { cpi->fixed_divide[i] = 0x80000 / i; } -void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { +void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; - if (!cpi || !oxcf) - return; + if (cm->profile != oxcf->profile) + cm->profile = oxcf->profile; + cm->bit_depth = oxcf->bit_depth; - if (cm->version != oxcf->version) { - cm->version = oxcf->version; - } + if (cm->profile <= PROFILE_1) + assert(cm->bit_depth == BITS_8); + else + assert(cm->bit_depth > BITS_8); cpi->oxcf = *oxcf; @@ -1325,15 +828,17 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { break; } - cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; - cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; - cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; - cpi->oxcf.lossless = oxcf->lossless; - cpi->mb.e_mbd.itxm_add = cpi->oxcf.lossless ? vp9_iwht4x4_add - : vp9_idct4x4_add; - cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; - + if (cpi->oxcf.lossless) { + // In lossless mode, make sure right quantizer range and correct transform + // is set. + cpi->oxcf.worst_allowed_q = 0; + cpi->oxcf.best_allowed_q = 0; + cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; + } else { + cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; + } + rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; cpi->refresh_golden_frame = 0; @@ -1382,17 +887,15 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { cpi->oxcf.target_bandwidth, 1000); // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. - cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target, - cpi->oxcf.maximum_buffer_size); - cpi->rc.buffer_level = MIN(cpi->rc.buffer_level, - cpi->oxcf.maximum_buffer_size); + rc->bits_off_target = MIN(rc->bits_off_target, cpi->oxcf.maximum_buffer_size); + rc->buffer_level = MIN(rc->buffer_level, cpi->oxcf.maximum_buffer_size); // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->oxcf.framerate); // Set absolute upper and lower quality limits - cpi->rc.worst_quality = cpi->oxcf.worst_allowed_q; - cpi->rc.best_quality = cpi->oxcf.best_allowed_q; + rc->worst_quality = cpi->oxcf.worst_allowed_q; + rc->best_quality = cpi->oxcf.best_allowed_q; // active values should only be modified if out of new range @@ -1417,8 +920,9 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { } update_frame_size(cpi); - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { vp9_update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth); } @@ -1434,7 +938,7 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { #else cpi->alt_ref_source = NULL; #endif - cpi->rc.is_src_frame_alt_ref = 0; + rc->is_src_frame_alt_ref = 0; #if 0 // Experimental RD Code @@ -1455,7 +959,7 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) { mvjointsadcost[0] = 600; mvjointsadcost[1] = 300; mvjointsadcost[2] = 300; - mvjointsadcost[0] = 300; + mvjointsadcost[3] = 300; } static void cal_nmvsadcosts(int *mvsadcost[2]) { @@ -1611,7 +1115,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { int i, j; VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP)); VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL; - RATE_CONTROL *const rc = cpi != NULL ? &cpi->rc : NULL; if (!cm) return NULL; @@ -1634,6 +1137,7 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->use_svc = 0; init_config(cpi, oxcf); + vp9_rc_init(&cpi->oxcf, cpi->pass, &cpi->rc); init_pick_mode_context(cpi); cm->current_video_frame = 0; @@ -1641,8 +1145,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { // Set reference frame sign bias for ALTREF frame to 1 (for now) cm->ref_frame_sign_bias[ALTREF_FRAME] = 1; - rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; - cpi->gold_is_last = 0; cpi->alt_is_last = 0; cpi->gold_is_alt = 0; @@ -1656,8 +1158,8 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); // Create a map used for cyclic background refresh. - CHECK_MEM_ERROR(cm, cpi->cyclic_refresh.map, - vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); + CHECK_MEM_ERROR(cm, cpi->cyclic_refresh, + vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols)); // And a place holder structure is the coding context // for use if we want to save and restore it @@ -1678,13 +1180,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; cpi->key_frame_frequency = cpi->oxcf.key_freq; - - rc->frames_since_key = 8; // Sensible default for first frame. - rc->this_key_frame_forced = 0; - rc->next_key_frame_forced = 0; - - rc->source_alt_ref_pending = 0; - rc->source_alt_ref_active = 0; cpi->refresh_alt_ref_frame = 0; #if CONFIG_MULTIPLE_ARF @@ -1740,18 +1235,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->first_time_stamp_ever = INT64_MAX; - rc->frames_till_gf_update_due = 0; - - rc->ni_av_qi = cpi->oxcf.worst_allowed_q; - rc->ni_tot_qi = 0; - rc->ni_frames = 0; - rc->tot_q = 0.0; - rc->avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q); - - rc->rate_correction_factor = 1.0; - rc->key_frame_rate_correction_factor = 1.0; - rc->gf_rate_correction_factor = 1.0; - cal_nmvjointsadcost(cpi->mb.nmvjointsadcost); cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX]; cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX]; @@ -1787,13 +1270,53 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { const size_t packet_sz = sizeof(FIRSTPASS_STATS); const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); - cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; - cpi->twopass.stats_in = cpi->twopass.stats_in_start; - cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; - vp9_init_second_pass(cpi); + if (cpi->svc.number_spatial_layers > 1 + && cpi->svc.number_temporal_layers == 1) { + FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf; + FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0}; + int i; + + for (i = 0; i < oxcf->ss_number_layers; ++i) { + FIRSTPASS_STATS *const last_packet_for_layer = + &stats[packets - oxcf->ss_number_layers + i]; + const int layer_id = (int)last_packet_for_layer->spatial_layer_id; + const int packets_in_layer = (int)last_packet_for_layer->count + 1; + if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id]; + + vpx_free(lc->rc_twopass_stats_in.buf); + + lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz; + CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf, + vpx_malloc(lc->rc_twopass_stats_in.sz)); + lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf; + lc->twopass.stats_in = lc->twopass.stats_in_start; + lc->twopass.stats_in_end = lc->twopass.stats_in_start + + packets_in_layer - 1; + stats_copy[layer_id] = lc->rc_twopass_stats_in.buf; + } + } + + for (i = 0; i < packets; ++i) { + const int layer_id = (int)stats[i].spatial_layer_id; + if (layer_id >= 0 && layer_id < oxcf->ss_number_layers + && stats_copy[layer_id] != NULL) { + *stats_copy[layer_id] = stats[i]; + ++stats_copy[layer_id]; + } + } + + vp9_init_second_pass_spatial_svc(cpi); + } else { + cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; + cpi->twopass.stats_in = cpi->twopass.stats_in_start; + cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; + + vp9_init_second_pass(cpi); + } } - vp9_set_speed_features(cpi); + set_speed_features(cpi); // Default rd threshold factors for mode selection for (i = 0; i < BLOCK_SIZES; ++i) { @@ -2046,53 +1569,42 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #endif } +static int64_t get_sse(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int width, int height) { + const int dw = width % 16; + const int dh = height % 16; + int64_t total_sse = 0; + unsigned int sse = 0; + int sum = 0; + int x, y; + + if (dw > 0) { + variance(&a[width - dw], a_stride, &b[width - dw], b_stride, + dw, height, &sse, &sum); + total_sse += sse; + } + if (dh > 0) { + variance(&a[(height - dh) * a_stride], a_stride, + &b[(height - dh) * b_stride], b_stride, + width - dw, dh, &sse, &sum); + total_sse += sse; + } -static uint64_t calc_plane_error(const uint8_t *orig, int orig_stride, - const uint8_t *recon, int recon_stride, - unsigned int cols, unsigned int rows) { - unsigned int row, col; - uint64_t total_sse = 0; - int diff; - - for (row = 0; row + 16 <= rows; row += 16) { - for (col = 0; col + 16 <= cols; col += 16) { - unsigned int sse; - - vp9_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); + for (y = 0; y < height / 16; ++y) { + const uint8_t *pa = a; + const uint8_t *pb = b; + for (x = 0; x < width / 16; ++x) { + vp9_mse16x16(pa, a_stride, pb, b_stride, &sse); total_sse += sse; - } - - /* Handle odd-sized width */ - if (col < cols) { - unsigned int border_row, border_col; - const uint8_t *border_orig = orig; - const uint8_t *border_recon = recon; - for (border_row = 0; border_row < 16; border_row++) { - for (border_col = col; border_col < cols; border_col++) { - diff = border_orig[border_col] - border_recon[border_col]; - total_sse += diff * diff; - } - - border_orig += orig_stride; - border_recon += recon_stride; - } + pa += 16; + pb += 16; } - orig += orig_stride * 16; - recon += recon_stride * 16; - } - - /* Handle odd-sized height */ - for (; row < rows; row++) { - for (col = 0; col < cols; col++) { - diff = orig[col] - recon[col]; - total_sse += diff * diff; - } - - orig += orig_stride; - recon += recon_stride; + a += 16 * a_stride; + b += 16 * b_stride; } return total_sse; @@ -2120,9 +1632,9 @@ static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, const int w = widths[i]; const int h = heights[i]; const uint32_t samples = w * h; - const uint64_t sse = calc_plane_error(a_planes[i], a_strides[i], - b_planes[i], b_strides[i], - w, h); + const uint64_t sse = get_sse(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], + w, h); psnr->sse[1 + i] = sse; psnr->samples[1 + i] = samples; psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse); @@ -2521,7 +2033,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vpx_usec_timer_start(&timer); - vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick); + vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick); vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); @@ -2596,7 +2108,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { vp9_clear_system_state(); - recon_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); if (cpi->twopass.total_left_stats.coded_error != 0.0) fprintf(f, "%10u %10d %10d %10d %10d %10d " @@ -2652,7 +2164,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, int q) { VP9_COMMON *const cm = &cpi->common; vp9_clear_system_state(); - vp9_set_quantizer(cpi, q); + vp9_set_quantizer(cm, q); // Set up entropy context depending on frame type. The decoder mandates // the use of the default context, index 0, for keyframes and inter @@ -2660,21 +2172,21 @@ static void encode_without_recode_loop(VP9_COMP *cpi, // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. if (cm->frame_type == KEY_FRAME) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) { - cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; - } - vp9_setup_inter_frame(cpi); + if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) + cm->frame_context_idx = cpi->refresh_alt_ref_frame; + + setup_inter_frame(cm); } // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - setup_in_frame_q_adj(cpi); + vp9_setup_in_frame_q_adj(cpi); } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - vp9_setup_cyclic_refresh_aq(cpi); + vp9_cyclic_refresh_setup(cpi); } // transform / motion compensation build reconstruction frame vp9_encode_frame(cpi); @@ -2709,7 +2221,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, do { vp9_clear_system_state(); - vp9_set_quantizer(cpi, q); + vp9_set_quantizer(cm, q); if (loop_count == 0) { // Set up entropy context depending on frame type. The decoder mandates @@ -2718,12 +2230,12 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. if (cm->frame_type == KEY_FRAME) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) { + if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; - } - vp9_setup_inter_frame(cpi); + + setup_inter_frame(cm); } } @@ -2732,7 +2244,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - setup_in_frame_q_adj(cpi); + vp9_setup_in_frame_q_adj(cpi); } // transform / motion compensation build reconstruction frame @@ -2748,13 +2260,13 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // accurate estimate of output frame size to determine if we need // to recode. if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { - vp9_save_coding_context(cpi); + save_coding_context(cpi); cpi->dummy_packing = 1; if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size); rc->projected_frame_size = (int)(*size) << 3; - vp9_restore_coding_context(cpi); + restore_coding_context(cpi); if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; @@ -2767,7 +2279,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, rc->this_key_frame_forced && (rc->projected_frame_size < rc->max_frame_bandwidth)) { int last_q = q; - int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + int kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); int high_err_target = cpi->ambient_err; int low_err_target = cpi->ambient_err >> 1; @@ -2971,6 +2483,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } else { cpi->Source = cpi->un_scaled_source; } + + // Scale the last source buffer, if required. + if (cpi->unscaled_last_source != NULL) { + if (cm->mi_cols * MI_SIZE != cpi->unscaled_last_source->y_width || + cm->mi_rows * MI_SIZE != cpi->unscaled_last_source->y_height) { + scale_and_extend_frame_nonnormative(cpi->unscaled_last_source, + &cpi->scaled_last_source); + cpi->Last_Source = &cpi->scaled_last_source; + } else { + cpi->Last_Source = cpi->unscaled_last_source; + } + } + vp9_scale_references(cpi); vp9_clear_system_state(); @@ -3008,7 +2533,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set various flags etc to special state if it is a key frame. if (frame_is_intra_only(cm)) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); // Reset the loop filter deltas and segmentation map. vp9_reset_segment_features(&cm->seg); @@ -3090,6 +2615,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_write_yuv_frame(cpi->Source); #endif + set_speed_features(cpi); + // Decide q and q bounds. q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index); @@ -3099,8 +2626,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH); } - vp9_set_speed_features(cpi); - if (cpi->sf.recode_loop == DISALLOW_RECODE) { encode_without_recode_loop(cpi, size, dest, q); } else { @@ -3111,7 +2636,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // fixed interval. Note the reconstruction error if it is the frame before // the force key frame if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) { - cpi->ambient_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } // If the encoder forced a KEY_FRAME decision @@ -3231,11 +2756,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Don't increment frame counters if this was an altref buffer // update not a real frame ++cm->current_video_frame; + if (cpi->use_svc) + vp9_inc_frame_in_layer(&cpi->svc); } // restore prev_mi - cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; + cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; } static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, @@ -3261,7 +2788,7 @@ static void Pass1Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, (void) frame_flags; vp9_rc_get_first_pass_params(cpi); - vp9_set_quantizer(cpi, find_fp_qindex()); + vp9_set_quantizer(&cpi->common, find_fp_qindex()); vp9_first_pass(cpi); } @@ -3272,7 +2799,7 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, vp9_rc_get_second_pass_params(cpi); encode_frame_to_data_rate(cpi, size, dest, frame_flags); - vp9_twopass_postencode_update(cpi, *size); + vp9_twopass_postencode_update(cpi); } static void check_initial_width(VP9_COMP *cpi, int subsampling_x, @@ -3306,7 +2833,7 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); - if (cm->version == 0 && (subsampling_x != 1 || subsampling_y != 1)) { + if (cm->profile == PROFILE_0 && (subsampling_x != 1 || subsampling_y != 1)) { vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM, "Non-4:2:0 color space requires profile >= 1"); res = -1; @@ -3377,8 +2904,9 @@ void adjust_frame_rate(VP9_COMP *cpi) { int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + RATE_CONTROL *const rc = &cpi->rc; struct vpx_usec_timer cmptimer; YV12_BUFFER_CONFIG *force_src_buffer = NULL; MV_REFERENCE_FRAME ref_frame; @@ -3386,9 +2914,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (!cpi) return -1; + if (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2) { + vp9_restore_layer_context(cpi); + } + vpx_usec_timer_start(&cmptimer); cpi->source = NULL; + cpi->last_source = NULL; set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV); @@ -3400,7 +2933,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->refresh_alt_ref_frame = 0; // Should we code an alternate reference frame. - if (cpi->oxcf.play_alternate && cpi->rc.source_alt_ref_pending) { + if (cpi->oxcf.play_alternate && rc->source_alt_ref_pending) { int frames_to_arf; #if CONFIG_MULTIPLE_ARF @@ -3412,9 +2945,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, - cpi->next_frame_in_order; else #endif - frames_to_arf = cpi->rc.frames_till_gf_update_due; + frames_to_arf = rc->frames_till_gf_update_due; - assert(frames_to_arf <= cpi->rc.frames_to_key); + assert(frames_to_arf <= rc->frames_to_key); if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, frames_to_arf))) { #if CONFIG_MULTIPLE_ARF @@ -3426,7 +2959,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->oxcf.arnr_max_frames > 0) { // Produce the filtered ARF frame. // TODO(agrange) merge these two functions. - vp9_configure_arnr_filter(cpi, frames_to_arf, cpi->rc.gfu_boost); + vp9_configure_arnr_filter(cpi, frames_to_arf, rc->gfu_boost); vp9_temporal_filter_prepare(cpi, frames_to_arf); vp9_extend_frame_borders(&cpi->alt_ref_buffer); force_src_buffer = &cpi->alt_ref_buffer; @@ -3436,14 +2969,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->refresh_alt_ref_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 0; - cpi->rc.is_src_frame_alt_ref = 0; + rc->is_src_frame_alt_ref = 0; #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; } else { - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; } } @@ -3451,25 +2984,32 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #if CONFIG_MULTIPLE_ARF int i; #endif + + // Get last frame source. + if (cm->current_video_frame > 0) { + if ((cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL) + return -1; + } + if ((cpi->source = vp9_lookahead_pop(cpi->lookahead, flush))) { cm->show_frame = 1; cm->intra_only = 0; #if CONFIG_MULTIPLE_ARF // Is this frame the ARF overlay. - cpi->rc.is_src_frame_alt_ref = 0; + rc->is_src_frame_alt_ref = 0; for (i = 0; i < cpi->arf_buffered; ++i) { if (cpi->source == cpi->alt_ref_source[i]) { - cpi->rc.is_src_frame_alt_ref = 1; + rc->is_src_frame_alt_ref = 1; cpi->refresh_golden_frame = 1; break; } } #else - cpi->rc.is_src_frame_alt_ref = cpi->alt_ref_source - && (cpi->source == cpi->alt_ref_source); + rc->is_src_frame_alt_ref = cpi->alt_ref_source && + (cpi->source == cpi->alt_ref_source); #endif - if (cpi->rc.is_src_frame_alt_ref) { + if (rc->is_src_frame_alt_ref) { // Current frame is an ARF overlay frame. #if CONFIG_MULTIPLE_ARF cpi->alt_ref_source[i] = NULL; @@ -3489,13 +3029,20 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->source) { cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer : &cpi->source->img; + + if (cpi->last_source != NULL) { + cpi->unscaled_last_source = &cpi->last_source->img; + } else { + cpi->unscaled_last_source = NULL; + } + *time_stamp = cpi->source->ts_start; *time_end = cpi->source->ts_end; *frame_flags = cpi->source->flags; #if CONFIG_MULTIPLE_ARF - if ((cm->frame_type != KEY_FRAME) && (cpi->pass == 2)) - cpi->rc.source_alt_ref_pending = is_next_frame_arf(cpi); + if (cm->frame_type != KEY_FRAME && cpi->pass == 2) + rc->source_alt_ref_pending = is_next_frame_arf(cpi); #endif } else { *size = 0; @@ -3518,7 +3065,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - vp9_update_layer_framerate(cpi); + vp9_update_temporal_layer_framerate(cpi); vp9_restore_layer_context(cpi); } @@ -3568,8 +3115,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); - xd->interp_kernel = vp9_get_interp_kernel( - DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_init(); @@ -3578,7 +3123,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->pass == 1 && (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass1Encode(cpi, size, dest, frame_flags); - } else if (cpi->pass == 2 && !cpi->use_svc) { + } else if (cpi->pass == 2 && + (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass2Encode(cpi, size, dest, frame_flags); } else if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); @@ -3600,8 +3146,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Save layer specific state. - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { vp9_save_layer_context(cpi); } @@ -3844,28 +3391,12 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) { return; } -int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *reference) { - int i, j; - int total = 0; - - const uint8_t *src = source->y_buffer; - const uint8_t *ref = reference->y_buffer; - - // Loop through the Y plane raw and reconstruction data summing - // (square differences) - for (i = 0; i < source->y_height; i += 16) { - for (j = 0; j < source->y_width; j += 16) { - unsigned int sse; - total += vp9_mse16x16(src + j, source->y_stride, - ref + j, reference->y_stride, &sse); - } - - src += 16 * source->y_stride; - ref += 16 * reference->y_stride; - } +int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { + assert(a->y_crop_width == b->y_crop_width); + assert(a->y_crop_height == b->y_crop_height); - return total; + return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height); } diff --git a/libvpx/vp9/encoder/vp9_onyx_int.h b/libvpx/vp9/encoder/vp9_onyx_int.h index f4b44ce..e30fb02 100644 --- a/libvpx/vp9/encoder/vp9_onyx_int.h +++ b/libvpx/vp9/encoder/vp9_onyx_int.h @@ -23,6 +23,7 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_lookahead.h" @@ -30,6 +31,7 @@ #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_tokenize.h" #include "vp9/encoder/vp9_variance.h" @@ -114,75 +116,6 @@ typedef enum { } THR_MODES_SUB8X8; typedef enum { - DIAMOND = 0, - NSTEP = 1, - HEX = 2, - BIGDIA = 3, - SQUARE = 4, - FAST_HEX = 5, - FAST_DIAMOND = 6 -} SEARCH_METHODS; - -typedef enum { - USE_FULL_RD = 0, - USE_LARGESTINTRA, - USE_LARGESTINTRA_MODELINTER, - USE_LARGESTALL -} TX_SIZE_SEARCH_METHOD; - -typedef enum { - NOT_IN_USE = 0, - RELAXED_NEIGHBORING_MIN_MAX = 1, - STRICT_NEIGHBORING_MIN_MAX = 2 -} AUTO_MIN_MAX_MODE; - -typedef enum { - // Terminate search early based on distortion so far compared to - // qp step, distortion in the neighborhood of the frame, etc. - FLAG_EARLY_TERMINATE = 1 << 0, - - // Skips comp inter modes if the best so far is an intra mode. - FLAG_SKIP_COMP_BESTINTRA = 1 << 1, - - // Skips comp inter modes if the best single intermode so far does - // not have the same reference as one of the two references being - // tested. - FLAG_SKIP_COMP_REFMISMATCH = 1 << 2, - - // Skips oblique intra modes if the best so far is an inter mode. - FLAG_SKIP_INTRA_BESTINTER = 1 << 3, - - // Skips oblique intra modes at angles 27, 63, 117, 153 if the best - // intra so far is not one of the neighboring directions. - FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, - - // Skips intra modes other than DC_PRED if the source variance is small - FLAG_SKIP_INTRA_LOWVAR = 1 << 5, -} MODE_SEARCH_SKIP_LOGIC; - -typedef enum { - SUBPEL_TREE = 0, - // Other methods to come -} SUBPEL_SEARCH_METHODS; - -typedef enum { - LAST_FRAME_PARTITION_OFF = 0, - LAST_FRAME_PARTITION_LOW_MOTION = 1, - LAST_FRAME_PARTITION_ALL = 2 -} LAST_FRAME_PARTITION_METHOD; - -typedef enum { - // No recode. - DISALLOW_RECODE = 0, - // Allow recode for KF and exceeding maximum frame bandwidth. - ALLOW_RECODE_KFMAXBW = 1, - // Allow recode only for KF/ARF/GF frames. - ALLOW_RECODE_KFARFGF = 2, - // Allow recode for all frames based on bitrate constraints. - ALLOW_RECODE = 3, -} RECODE_LOOP_TYPE; - -typedef enum { // encode_breakout is disabled. ENCODE_BREAKOUT_DISABLED = 0, // encode_breakout is enabled. @@ -192,225 +125,6 @@ typedef enum { } ENCODE_BREAKOUT_TYPE; typedef enum { - // Search partitions using RD/NONRD criterion - SEARCH_PARTITION = 0, - - // Always use a fixed size partition - FIXED_PARTITION = 1, - - // Use a fixed size partition in every 64X64 SB, where the size is - // determined based on source variance - VAR_BASED_FIXED_PARTITION = 2, - - REFERENCE_PARTITION = 3, - - // Use an arbitrary partitioning scheme based on source variance within - // a 64X64 SB - VAR_BASED_PARTITION -} PARTITION_SEARCH_TYPE; - -typedef struct { - // Frame level coding parameter update - int frame_parameter_update; - - // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). - SEARCH_METHODS search_method; - - RECODE_LOOP_TYPE recode_loop; - - // Subpel_search_method can only be subpel_tree which does a subpixel - // logarithmic search that keeps stepping at 1/2 pixel units until - // you stop getting a gain, and then goes on to 1/4 and repeats - // the same process. Along the way it skips many diagonals. - SUBPEL_SEARCH_METHODS subpel_search_method; - - // Maximum number of steps in logarithmic subpel search before giving up. - int subpel_iters_per_step; - - // Control when to stop subpel search - int subpel_force_stop; - - // Thresh_mult is used to set a threshold for the rd score. A higher value - // means that we will accept the best mode so far more often. This number - // is used in combination with the current block size, and thresh_freq_fact - // to pick a threshold. - int thresh_mult[MAX_MODES]; - int thresh_mult_sub8x8[MAX_REFS]; - - // This parameter controls the number of steps we'll do in a diamond - // search. - int max_step_search_steps; - - // This parameter controls which step in the n-step process we start at. - // It's changed adaptively based on circumstances. - int reduce_first_step_size; - - // If this is set to 1, we limit the motion search range to 2 times the - // largest motion vector found in the last frame. - int auto_mv_step_size; - - // Trellis (dynamic programming) optimization of quantized values (+1, 0). - int optimize_coefficients; - - // Always set to 0. If on it enables 0 cost background transmission - // (except for the initial transmission of the segmentation). The feature is - // disabled because the addition of very large block sizes make the - // backgrounds very to cheap to encode, and the segmentation we have - // adds overhead. - int static_segmentation; - - // If 1 we iterate finding a best reference for 2 ref frames together - via - // a log search that iterates 4 times (check around mv for last for best - // error of combined predictor then check around mv for alt). If 0 we - // we just use the best motion vector found for each frame by itself. - int comp_inter_joint_search_thresh; - - // This variable is used to cap the maximum number of times we skip testing a - // mode to be evaluated. A high value means we will be faster. - int adaptive_rd_thresh; - - // Enables skipping the reconstruction step (idct, recon) in the - // intermediate steps assuming the last frame didn't have too many intra - // blocks and the q is less than a threshold. - int skip_encode_sb; - int skip_encode_frame; - - // This variable allows us to reuse the last frames partition choices - // (64x64 v 32x32 etc) for this frame. It can be set to only use the last - // frame as a starting point in low motion scenes or always use it. If set - // we use last partitioning_redo frequency to determine how often to redo - // the partitioning from scratch. Adjust_partitioning_from_last_frame - // enables us to adjust up or down one partitioning from the last frames - // partitioning. - LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; - - // Determine which method we use to determine transform size. We can choose - // between options like full rd, largest for prediction size, largest - // for intra and model coefs for the rest. - TX_SIZE_SEARCH_METHOD tx_size_search_method; - - // Low precision 32x32 fdct keeps everything in 16 bits and thus is less - // precise but significantly faster than the non lp version. - int use_lp32x32fdct; - - // TODO(JBB): remove this as its no longer used. - - // After looking at the first set of modes (set by index here), skip - // checking modes for reference frames that don't match the reference frame - // of the best so far. - int mode_skip_start; - - // TODO(JBB): Remove this. - int reference_masking; - - PARTITION_SEARCH_TYPE partition_search_type; - - // Used if partition_search_type = FIXED_SIZE_PARTITION - BLOCK_SIZE always_this_block_size; - - // Skip rectangular partition test when partition type none gives better - // rd than partition type split. - int less_rectangular_check; - - // Disable testing non square partitions. (eg 16x32) - int use_square_partition_only; - - // Sets min and max partition sizes for this 64x64 region based on the - // same 64x64 in last encoded frame, and the left and above neighbor. - AUTO_MIN_MAX_MODE auto_min_max_partition_size; - - // Min and max partition size we enable (block_size) as per auto - // min max, but also used by adjust partitioning, and pick_partitioning. - BLOCK_SIZE min_partition_size; - BLOCK_SIZE max_partition_size; - - // Whether or not we allow partitions one smaller or one greater than the last - // frame's partitioning. Only used if use_lastframe_partitioning is set. - int adjust_partitioning_from_last_frame; - - // How frequently we re do the partitioning from scratch. Only used if - // use_lastframe_partitioning is set. - int last_partitioning_redo_frequency; - - // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable - // it always, to allow it for only Last frame and Intra, disable it for all - // inter modes or to enable it always. - int disable_split_mask; - - // TODO(jingning): combine the related motion search speed features - // This allows us to use motion search at other sizes as a starting - // point for this motion search and limits the search range around it. - int adaptive_motion_search; - - // Allows sub 8x8 modes to use the prediction filter that was determined - // best for 8x8 mode. If set to 0 we always re check all the filters for - // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter - // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. - int adaptive_pred_interp_filter; - - // Search through variable block partition types in non-RD mode decision - // encoding process for RTC. - int partition_check; - - // Implements various heuristics to skip searching modes - // The heuristics selected are based on flags - // defined in the MODE_SEARCH_SKIP_HEURISTICS enum - unsigned int mode_search_skip_flags; - - // A source variance threshold below which the split mode is disabled - unsigned int disable_split_var_thresh; - - // A source variance threshold below which filter search is disabled - // Choose a very large value (UINT_MAX) to use 8-tap always - unsigned int disable_filter_search_var_thresh; - - // These bit masks allow you to enable or disable intra modes for each - // transform size separately. - int intra_y_mode_mask[TX_SIZES]; - int intra_uv_mode_mask[TX_SIZES]; - - // This variable enables an early break out of mode testing if the model for - // rd built from the prediction signal indicates a value that's much - // higher than the best rd we've seen so far. - int use_rd_breakout; - - // This enables us to use an estimate for intra rd based on dc mode rather - // than choosing an actual uv mode in the stage of encoding before the actual - // final encode. - int use_uv_intra_rd_estimate; - - // This feature controls how the loop filter level is determined: - // 0: Try the full image with different values. - // 1: Try a small portion of the image with different values. - // 2: Estimate the level based on quantizer and frame type - int use_fast_lpf_pick; - - // This feature limits the number of coefficients updates we actually do - // by only looking at counts from 1/2 the bands. - int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced - - // This flag controls the use of non-RD mode decision. - int use_nonrd_pick_mode; - - // This variable sets the encode_breakout threshold. Currently, it is only - // enabled in real time mode. - int encode_breakout_thresh; - - // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV - // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. - int disable_inter_mode_mask[BLOCK_SIZES]; - - // This feature controls whether we do the expensive context update and - // calculation in the rd coefficient costing loop. - int use_fast_coef_costing; - - // This variable controls the maximum block size where intra blocks can be - // used in inter frames. - // TODO(aconverse): Fold this into one of the other many mode skips - BLOCK_SIZE max_intra_bsize; -} SPEED_FEATURES; - -typedef enum { NORMAL = 0, FOURFIVE = 1, THREEFIVE = 2, @@ -418,44 +132,12 @@ typedef enum { } VPX_SCALING; typedef enum { - VP9_LAST_FLAG = 1 << 0, - VP9_GOLD_FLAG = 1 << 1, - VP9_ALT_FLAG = 1 << 2, -} VP9_REFFRAME; - -typedef enum { USAGE_LOCAL_FILE_PLAYBACK = 0, USAGE_STREAM_FROM_SERVER = 1, USAGE_CONSTRAINED_QUALITY = 2, USAGE_CONSTANT_QUALITY = 3, } END_USAGE; -typedef struct { - // Target percentage of blocks per frame that are cyclicly refreshed. - int max_mbs_perframe; - // Maximum q-delta as percentage of base q. - int max_qdelta_perc; - // Block size below which we don't apply cyclic refresh. - BLOCK_SIZE min_block_size; - // Macroblock starting index (unit of 8x8) for cycling through the frame. - int mb_index; - // Controls how long a block will need to wait to be refreshed again. - int time_for_refresh; - // Actual number of blocks that were applied delta-q (segment 1). - int num_seg_blocks; - // Actual encoding bits for segment 1. - int actual_seg_bits; - // RD mult. parameters for segment 1. - int rdmult; - // Cyclic refresh map. - signed char *map; - // Projected rate and distortion for the current superblock. - int64_t projected_rate_sb; - int64_t projected_dist_sb; - // Thresholds applied to projected rate/distortion of the superblock. - int64_t thresh_rate_sb; - int64_t thresh_dist_sb; -} CYCLIC_REFRESH; typedef enum { // Good Quality Fast Encoding. The encoder balances quality with the // amount of time it takes to encode the output. (speed setting @@ -503,10 +185,9 @@ typedef enum { AQ_MODE_COUNT // This should always be the last member of the enum } AQ_MODE; -typedef struct { - int version; // 4 versions of bitstream defined: - // 0 - best quality/slowest decode, - // 3 - lowest quality/fastest decode +typedef struct VP9_CONFIG { + BITSTREAM_PROFILE profile; + BIT_DEPTH bit_depth; int width; // width of data passed to the compressor int height; // height of data passed to the compressor double framerate; // set to passed in framerate @@ -550,6 +231,9 @@ typedef struct { int lossless; AQ_MODE aq_mode; // Adaptive Quantization mode + // Enable feature to reduce the frame quantization every x frames. + int frame_periodic_boost; + // two pass datarate control int two_pass_vbrbias; // two pass datarate control tweaks int two_pass_vbrmin_section; @@ -598,23 +282,7 @@ typedef struct { } VP9_CONFIG; typedef struct VP9_COMP { - DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]); - - DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); - -#if CONFIG_ALPHA - DECLARE_ALIGNED(16, int16_t, a_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, a_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, a_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, a_round[QINDEX_RANGE][8]); -#endif - + QUANTS quants; MACROBLOCK mb; VP9_COMMON common; VP9_CONFIG oxcf; @@ -625,10 +293,14 @@ typedef struct VP9_COMP { #else struct lookahead_entry *alt_ref_source; #endif + struct lookahead_entry *last_source; YV12_BUFFER_CONFIG *Source; + YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames YV12_BUFFER_CONFIG *un_scaled_source; YV12_BUFFER_CONFIG scaled_source; + YV12_BUFFER_CONFIG *unscaled_last_source; + YV12_BUFFER_CONFIG scaled_last_source; int key_frame_frequency; @@ -671,6 +343,13 @@ typedef struct VP9_COMP { // Ambient reconstruction err target for force key frames int ambient_err; + // Thresh_mult is used to set a threshold for the rd score. A higher value + // means that we will accept the best mode so far more often. This number + // is used in combination with the current block size, and thresh_freq_fact + // to pick a threshold. + int rd_thresh_mult[MAX_MODES]; + int rd_thresh_mult_sub8x8[MAX_REFS]; + int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS]; @@ -708,14 +387,12 @@ typedef struct VP9_COMP { vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES]; - vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES]; struct vpx_codec_pkt_list *output_pkt_list; MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; int mbgraph_n_frames; // number of frames filled in the above int static_mb_pct; // % forced skip mbs by segmentation - int seg0_progress, seg0_idx, seg0_cnt; // for real time encoding int speed; @@ -747,7 +424,7 @@ typedef struct VP9_COMP { unsigned char *active_map; unsigned int active_map_enabled; - CYCLIC_REFRESH cyclic_refresh; + CYCLIC_REFRESH *cyclic_refresh; fractional_mv_step_fp *find_fractional_mv_step; fractional_mv_step_comp_fp *find_fractional_mv_step_comp; @@ -805,10 +482,6 @@ typedef struct VP9_COMP { unsigned int activity_avg; unsigned int *mb_activity_map; int *mb_norm_activity_map; - int output_partition; - - // Force next frame to intra when kf_auto says so. - int force_next_frame_intra; int droppable; @@ -823,6 +496,8 @@ typedef struct VP9_COMP { SVC svc; + int use_large_partition_rate; + #if CONFIG_MULTIPLE_ARF // ARF tracking variables. int multi_arf_enabled; @@ -840,13 +515,6 @@ typedef struct VP9_COMP { // Debug / test stats int64_t mode_test_hits[BLOCK_SIZES]; #endif - - // Y,U,V,(A) - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; - - PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[8]; } VP9_COMP; void vp9_initialize_enc(); @@ -854,7 +522,7 @@ void vp9_initialize_enc(); struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf); void vp9_remove_compressor(VP9_COMP *cpi); -void vp9_change_config(VP9_COMP *cpi, VP9_CONFIG *oxcf); +void vp9_change_config(VP9_COMP *cpi, const VP9_CONFIG *oxcf); // receive a frames worth of data. caller can assume that a copy of this // frame is made and not just a copy of the pointer.. @@ -903,8 +571,8 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc); int vp9_get_quantizer(struct VP9_COMP *cpi); -static int get_ref_frame_idx(const VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { +static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, + MV_REFERENCE_FRAME ref_frame) { if (ref_frame == LAST_FRAME) { return cpi->lst_fb_idx; } else if (ref_frame == GOLDEN_FRAME) { @@ -914,39 +582,45 @@ static int get_ref_frame_idx(const VP9_COMP *cpi, } } -static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { - VP9_COMMON *const cm = &cpi->common; - return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, - ref_frame)]].buf; +static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( + VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { + VP9_COMMON * const cm = &cpi->common; + return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] + .buf; } -void vp9_set_speed_features(VP9_COMP *cpi); - -int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *reference); +// Intra only frames, golden frames (except alt ref overlays) and +// alt ref frames tend to be coded at a higher than ambient quality +static INLINE int vp9_frame_is_boosted(const VP9_COMP *cpi) { + return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); +} -void vp9_alloc_compressor_data(VP9_COMP *cpi); +static INLINE int get_token_alloc(int mb_rows, int mb_cols) { + // TODO(JBB): make this work for alpha channel and double check we can't + // exceed this token count if we have a 32x32 transform crossing a boundary + // at a multiple of 16. + // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full + // resolution. We assume up to 1 token per pixel, and then allow + // a head room of 4. + return mb_rows * mb_cols * (16 * 16 * 3 + 4); +} -int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget); +int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); -int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, - double rate_target_ratio); +void vp9_alloc_compressor_data(VP9_COMP *cpi); void vp9_scale_references(VP9_COMP *cpi); void vp9_update_reference_frames(VP9_COMP *cpi); -static int get_token_alloc(int mb_rows, int mb_cols) { - return mb_rows * mb_cols * (48 * 16 + 4); -} - extern const int q_trans[]; int64_t vp9_rescale(int64_t val, int64_t num, int denom); -static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, - MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { +static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, + MV_REFERENCE_FRAME ref0, + MV_REFERENCE_FRAME ref1) { xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0]; xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME diff --git a/libvpx/vp9/encoder/vp9_picklpf.c b/libvpx/vp9/encoder/vp9_picklpf.c index b5f4901..3ac8522 100644 --- a/libvpx/vp9/encoder/vp9_picklpf.c +++ b/libvpx/vp9/encoder/vp9_picklpf.c @@ -10,16 +10,18 @@ #include <assert.h> #include <limits.h> + +#include "./vpx_scale_rtcd.h" + +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_quant_common.h" + #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" -#include "vp9/common/vp9_quant_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpx_scale.h" -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" -#include "./vpx_scale_rtcd.h" static int get_max_filter_level(VP9_COMP *cpi) { return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 @@ -28,12 +30,12 @@ static int get_max_filter_level(VP9_COMP *cpi) { static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, - MACROBLOCKD *const xd, VP9_COMMON *const cm, int filt_level, int partial_frame) { + VP9_COMMON *const cm = &cpi->common; int filt_err; - vp9_loop_filter_frame(cm, xd, filt_level, 1, partial_frame); - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame); + filt_err = vp9_get_y_sse(sd, cm->frame_to_show); // Re-instate the unfiltered frame vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); @@ -43,7 +45,6 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial_frame) { - MACROBLOCKD *const xd = &cpi->mb.e_mbd; VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; const int min_filter_level = 0; @@ -64,7 +65,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Make a copy of the unfiltered / processed recon buffer vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); - best_err = try_filter_frame(sd, cpi, xd, cm, filt_mid, partial_frame); + best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame); filt_best = filt_mid; ss_err[filt_mid] = best_err; @@ -86,7 +87,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, if (filt_direction <= 0 && filt_low != filt_mid) { // Get Low filter error score if (ss_err[filt_low] < 0) { - filt_err = try_filter_frame(sd, cpi, xd, cm, filt_low, partial_frame); + filt_err = try_filter_frame(sd, cpi, filt_low, partial_frame); ss_err[filt_low] = filt_err; } else { filt_err = ss_err[filt_low]; @@ -105,7 +106,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Now look at filt_high if (filt_direction >= 0 && filt_high != filt_mid) { if (ss_err[filt_high] < 0) { - filt_err = try_filter_frame(sd, cpi, xd, cm, filt_high, partial_frame); + filt_err = try_filter_frame(sd, cpi, filt_high, partial_frame); ss_err[filt_high] = filt_err; } else { filt_err = ss_err[filt_high]; @@ -119,7 +120,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Half the step distance if the best filter value was the same as last time if (filt_best == filt_mid) { - filter_step = filter_step / 2; + filter_step /= 2; filt_direction = 0; } else { filt_direction = (filt_best < filt_mid) ? -1 : 1; @@ -131,25 +132,24 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, } void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, - int method) { + LPF_PICK_METHOD method) { VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness; - if (method == 2) { + if (method == LPF_PICK_FROM_Q) { const int min_filter_level = 0; const int max_filter_level = get_max_filter_level(cpi); const int q = vp9_ac_quant(cm->base_qindex, 0); // These values were determined by linear fitting the result of the - // searched level - // filt_guess = q * 0.316206 + 3.87252 - int filt_guess = (q * 20723 + 1015158 + (1 << 17)) >> 18; + // searched level, filt_guess = q * 0.316206 + 3.87252 + int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); if (cm->frame_type == KEY_FRAME) filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); } else { - search_filter_level(sd, cpi, method == 1); + search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); } } diff --git a/libvpx/vp9/encoder/vp9_picklpf.h b/libvpx/vp9/encoder/vp9_picklpf.h index 203ef87..7d08ddb 100644 --- a/libvpx/vp9/encoder/vp9_picklpf.h +++ b/libvpx/vp9/encoder/vp9_picklpf.h @@ -16,11 +16,13 @@ extern "C" { #endif +#include "vp9/encoder/vp9_onyx_int.h" + struct yv12_buffer_config; struct VP9_COMP; void vp9_pick_filter_level(const struct yv12_buffer_config *sd, - struct VP9_COMP *cpi, int method); + struct VP9_COMP *cpi, LPF_PICK_METHOD method); #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c index 6c84144..f3fe99c 100644 --- a/libvpx/vp9/encoder/vp9_pickmode.c +++ b/libvpx/vp9/encoder/vp9_pickmode.c @@ -29,9 +29,9 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, - int_mv *tmp_mv) { + int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int step_param; int sadpb = x->sadperbit16; @@ -76,8 +76,11 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, return; } } - - mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; + assert(x->mv_best_ref_index[ref] <= 2); + if (x->mv_best_ref_index[ref] < 2) + mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; + else + mvp_full = x->pred_mv[ref].as_mv; mvp_full.col >>= 3; mvp_full.row >>= 3; @@ -125,14 +128,20 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } + + // calculate the bit cost on motion vector + mvp_full.row = tmp_mv->as_mv.row * 8; + mvp_full.col = tmp_mv->as_mv.col * 8; + *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, - MV *tmp_mv, int *rate_mv) { + MV *tmp_mv) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int ref = mbmi->ref_frame[0]; MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; @@ -160,15 +169,13 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref]); - // calculate the bit cost on motion vector - *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } + + x->pred_mv[ref].as_mv = *tmp_mv; } static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, @@ -183,14 +190,12 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); - int var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, &sse); + int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, &sse); - vp9_model_rd_from_var_lapndz(var + sse, 1 << num_pels_log2_lookup[bs], + vp9_model_rd_from_var_lapndz(sse + var, 1 << num_pels_log2_lookup[bsize], pd->dequant[1] >> 3, &rate, &dist); - *out_rate_sum = rate; *out_dist_sum = dist << 3; } @@ -204,12 +209,12 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t *returndistortion, BLOCK_SIZE bsize) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode, best_mode = ZEROMV; MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; + INTERP_FILTER best_pred_filter = EIGHTTAP; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, @@ -227,6 +232,13 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, intra_cost_penalty, 0); const int64_t intra_mode_cost = 50; + unsigned char segment_id = mbmi->segment_id; + const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; + const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; + // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame. + int mode_idx[MB_MODE_COUNT] = {0}; + INTERP_FILTER filter_ref = SWITCHABLE; + x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; @@ -244,20 +256,24 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ? EIGHTTAP : cpi->common.interp_filter; mbmi->skip = 0; - mbmi->segment_id = 0; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); + mbmi->segment_id = segment_id; for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } + if (xd->up_available) + filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter; + else if (xd->left_available) + filter_ref = xd->mi[-1]->mbmi.interp_filter; + for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; @@ -270,6 +286,14 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame[0] = ref_frame; + // Set conversion index for LAST_FRAME. + if (ref_frame == LAST_FRAME) { + mode_idx[NEARESTMV] = THR_NEARESTMV; // LAST_FRAME, NEARESTMV + mode_idx[NEARMV] = THR_NEARMV; // LAST_FRAME, NEARMV + mode_idx[ZEROMV] = THR_ZEROMV; // LAST_FRAME, ZEROMV + mode_idx[NEWMV] = THR_NEWMV; // LAST_FRAME, NEWMV + } + for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int rate_mv = 0; @@ -277,18 +301,29 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (1 << INTER_OFFSET(this_mode))) continue; + if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] * + rd_thresh_freq_fact[this_mode] >> 5) || + rd_threshes[mode_idx[this_mode]] == INT_MAX) + continue; + if (this_mode == NEWMV) { + int rate_mode = 0; if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) continue; full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame]); + &frame_mv[NEWMV][ref_frame], &rate_mv); if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) continue; + rate_mode = x->inter_mode_cost[mbmi->mode_context[ref_frame]] + [INTER_OFFSET(this_mode)]; + if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd) + continue; + sub_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame].as_mv, &rate_mv); + &frame_mv[NEWMV][ref_frame].as_mv); } if (this_mode != NEARESTMV) @@ -298,9 +333,63 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mode = this_mode; mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + // Search for the best prediction filter type, when the resulting + // motion vector is at sub-pixel accuracy level for luma component, i.e., + // the last three bits are all zeros. + if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && + ((mbmi->mv[0].as_mv.row & 0x07) != 0 || + (mbmi->mv[0].as_mv.col & 0x07) != 0)) { + int64_t tmp_rdcost1 = INT64_MAX; + int64_t tmp_rdcost2 = INT64_MAX; + int64_t tmp_rdcost3 = INT64_MAX; + int pf_rate[3]; + int64_t pf_dist[3]; + + mbmi->interp_filter = EIGHTTAP; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP], + &pf_dist[EIGHTTAP]); + tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP], + pf_dist[EIGHTTAP]); + + mbmi->interp_filter = EIGHTTAP_SHARP; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP], + &pf_dist[EIGHTTAP_SHARP]); + tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SHARP], + pf_dist[EIGHTTAP_SHARP]); + + mbmi->interp_filter = EIGHTTAP_SMOOTH; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH], + &pf_dist[EIGHTTAP_SMOOTH]); + tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SMOOTH], + pf_dist[EIGHTTAP_SMOOTH]); + + if (tmp_rdcost2 < tmp_rdcost1) { + if (tmp_rdcost2 < tmp_rdcost3) + mbmi->interp_filter = EIGHTTAP_SHARP; + else + mbmi->interp_filter = EIGHTTAP_SMOOTH; + } else { + if (tmp_rdcost1 < tmp_rdcost3) + mbmi->interp_filter = EIGHTTAP; + else + mbmi->interp_filter = EIGHTTAP_SMOOTH; + } + + rate = pf_rate[mbmi->interp_filter]; + dist = pf_dist[mbmi->interp_filter]; + } else { + mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + } + rate += rate_mv; rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]] [INTER_OFFSET(this_mode)]; @@ -311,15 +400,17 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = rate; *returndistortion = dist; best_mode = this_mode; + best_pred_filter = mbmi->interp_filter; best_ref_frame = ref_frame; } } } mbmi->mode = best_mode; + mbmi->interp_filter = best_pred_filter; mbmi->ref_frame[0] = best_ref_frame; mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; - xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; // Perform intra prediction search, if the best SAD is above a certain // threshold. diff --git a/libvpx/vp9/encoder/vp9_quantize.c b/libvpx/vp9/encoder/vp9_quantize.c index 4ab8995..c092ee4 100644 --- a/libvpx/vp9/encoder/vp9_quantize.c +++ b/libvpx/vp9/encoder/vp9_quantize.c @@ -153,6 +153,7 @@ static void invert_quant(int16_t *quant, int16_t *shift, int d) { void vp9_init_quantizer(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + QUANTS *const quants = &cpi->quants; int i, q, quant; for (q = 0; q < QINDEX_RANGE; q++) { @@ -163,48 +164,49 @@ void vp9_init_quantizer(VP9_COMP *cpi) { // y quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q) : vp9_ac_quant(q, 0); - invert_quant(&cpi->y_quant[q][i], &cpi->y_quant_shift[q][i], quant); - cpi->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); - cpi->y_round[q][i] = (qrounding_factor * quant) >> 7; + invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant); + quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); + quants->y_round[q][i] = (qrounding_factor * quant) >> 7; cm->y_dequant[q][i] = quant; // uv quant = i == 0 ? vp9_dc_quant(q, cm->uv_dc_delta_q) : vp9_ac_quant(q, cm->uv_ac_delta_q); - invert_quant(&cpi->uv_quant[q][i], &cpi->uv_quant_shift[q][i], quant); - cpi->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); - cpi->uv_round[q][i] = (qrounding_factor * quant) >> 7; + invert_quant(&quants->uv_quant[q][i], + &quants->uv_quant_shift[q][i], quant); + quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); + quants->uv_round[q][i] = (qrounding_factor * quant) >> 7; cm->uv_dequant[q][i] = quant; #if CONFIG_ALPHA // alpha quant = i == 0 ? vp9_dc_quant(q, cm->a_dc_delta_q) : vp9_ac_quant(q, cm->a_ac_delta_q); - invert_quant(&cpi->a_quant[q][i], &cpi->a_quant_shift[q][i], quant); - cpi->a_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); - cpi->a_round[q][i] = (qrounding_factor * quant) >> 7; + invert_quant(&quants->a_quant[q][i], &quants->a_quant_shift[q][i], quant); + quants->a_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); + quants->a_round[q][i] = (qrounding_factor * quant) >> 7; cm->a_dequant[q][i] = quant; #endif } for (i = 2; i < 8; i++) { - cpi->y_quant[q][i] = cpi->y_quant[q][1]; - cpi->y_quant_shift[q][i] = cpi->y_quant_shift[q][1]; - cpi->y_zbin[q][i] = cpi->y_zbin[q][1]; - cpi->y_round[q][i] = cpi->y_round[q][1]; + quants->y_quant[q][i] = quants->y_quant[q][1]; + quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1]; + quants->y_zbin[q][i] = quants->y_zbin[q][1]; + quants->y_round[q][i] = quants->y_round[q][1]; cm->y_dequant[q][i] = cm->y_dequant[q][1]; - cpi->uv_quant[q][i] = cpi->uv_quant[q][1]; - cpi->uv_quant_shift[q][i] = cpi->uv_quant_shift[q][1]; - cpi->uv_zbin[q][i] = cpi->uv_zbin[q][1]; - cpi->uv_round[q][i] = cpi->uv_round[q][1]; + quants->uv_quant[q][i] = quants->uv_quant[q][1]; + quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1]; + quants->uv_zbin[q][i] = quants->uv_zbin[q][1]; + quants->uv_round[q][i] = quants->uv_round[q][1]; cm->uv_dequant[q][i] = cm->uv_dequant[q][1]; #if CONFIG_ALPHA - cpi->a_quant[q][i] = cpi->a_quant[q][1]; - cpi->a_quant_shift[q][i] = cpi->a_quant_shift[q][1]; - cpi->a_zbin[q][i] = cpi->a_zbin[q][1]; - cpi->a_round[q][i] = cpi->a_round[q][1]; + quants->a_quant[q][i] = quants->a_quant[q][1]; + quants->a_quant_shift[q][i] = quants->a_quant_shift[q][1]; + quants->a_zbin[q][i] = quants->a_zbin[q][1]; + quants->a_round[q][i] = quants->a_round[q][1]; cm->a_dequant[q][i] = cm->a_dequant[q][1]; #endif } @@ -213,27 +215,28 @@ void vp9_init_quantizer(VP9_COMP *cpi) { void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { const VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - const int segment_id = xd->mi_8x8[0]->mbmi.segment_id; + MACROBLOCKD *const xd = &x->e_mbd; + QUANTS *const quants = &cpi->quants; + const int segment_id = xd->mi[0]->mbmi.segment_id; const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); const int zbin = cpi->zbin_mode_boost + x->act_zbin_adj; int i; // Y - x->plane[0].quant = cpi->y_quant[qindex]; - x->plane[0].quant_shift = cpi->y_quant_shift[qindex]; - x->plane[0].zbin = cpi->y_zbin[qindex]; - x->plane[0].round = cpi->y_round[qindex]; + x->plane[0].quant = quants->y_quant[qindex]; + x->plane[0].quant_shift = quants->y_quant_shift[qindex]; + x->plane[0].zbin = quants->y_zbin[qindex]; + x->plane[0].round = quants->y_round[qindex]; x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7); xd->plane[0].dequant = cm->y_dequant[qindex]; // UV for (i = 1; i < 3; i++) { - x->plane[i].quant = cpi->uv_quant[qindex]; - x->plane[i].quant_shift = cpi->uv_quant_shift[qindex]; - x->plane[i].zbin = cpi->uv_zbin[qindex]; - x->plane[i].round = cpi->uv_round[qindex]; + x->plane[i].quant = quants->uv_quant[qindex]; + x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; + x->plane[i].zbin = quants->uv_zbin[qindex]; + x->plane[i].round = quants->uv_round[qindex]; x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7); xd->plane[i].dequant = cm->uv_dequant[qindex]; } @@ -273,9 +276,7 @@ void vp9_frame_init_quantizer(VP9_COMP *cpi) { vp9_init_plane_quantizers(cpi, &cpi->mb); } -void vp9_set_quantizer(struct VP9_COMP *cpi, int q) { - VP9_COMMON *const cm = &cpi->common; - +void vp9_set_quantizer(VP9_COMMON *cm, int q) { // quantizer has to be reinitialized with vp9_init_quantizer() if any // delta_q changes. cm->base_qindex = q; diff --git a/libvpx/vp9/encoder/vp9_quantize.h b/libvpx/vp9/encoder/vp9_quantize.h index f356b12..7d231df 100644 --- a/libvpx/vp9/encoder/vp9_quantize.h +++ b/libvpx/vp9/encoder/vp9_quantize.h @@ -17,12 +17,30 @@ extern "C" { #endif +typedef struct { + DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]); + + DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); + +#if CONFIG_ALPHA + DECLARE_ALIGNED(16, int16_t, a_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_round[QINDEX_RANGE][8]); +#endif +} QUANTS; + void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan); struct VP9_COMP; - -void vp9_set_quantizer(struct VP9_COMP *cpi, int q); +struct VP9Common; void vp9_frame_init_quantizer(struct VP9_COMP *cpi); @@ -32,6 +50,8 @@ void vp9_init_plane_quantizers(struct VP9_COMP *cpi, MACROBLOCK *x); void vp9_init_quantizer(struct VP9_COMP *cpi); +void vp9_set_quantizer(struct VP9Common *cm, int q); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c index edc48bb..3420816 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/libvpx/vp9/encoder/vp9_ratectrl.c @@ -35,9 +35,6 @@ #define MIN_BPB_FACTOR 0.005 #define MAX_BPB_FACTOR 50 -// Bits Per MB at different Q (Multiplied by 512) -#define BPER_MB_NORMBITS 9 - // Tables relating active max Q to active min Q static int kf_low_motion_minq[QINDEX_RANGE]; static int kf_high_motion_minq[QINDEX_RANGE]; @@ -55,10 +52,9 @@ static int kf_low = 400; // formulaic approach to facilitate easier adjustment of the Q tables. // The formulae were derived from computing a 3rd order polynomial best // fit to the original data (after plotting real maxq vs minq (not q index)) -static int calculate_minq_index(double maxq, - double x3, double x2, double x1, double c) { +static int get_minq_index(double maxq, double x3, double x2, double x1) { int i; - const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq + c, + const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq); // Special case handling to deal with the step from q2.0 @@ -66,57 +62,26 @@ static int calculate_minq_index(double maxq, if (minqtarget <= 2.0) return 0; - for (i = 0; i < QINDEX_RANGE; i++) { + for (i = 0; i < QINDEX_RANGE; i++) if (minqtarget <= vp9_convert_qindex_to_q(i)) return i; - } return QINDEX_RANGE - 1; } -void vp9_rc_init_minq_luts(void) { +void vp9_rc_init_minq_luts() { int i; for (i = 0; i < QINDEX_RANGE; i++) { const double maxq = vp9_convert_qindex_to_q(i); - - kf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.000001, - -0.0004, - 0.15, - 0.0); - kf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.000002, - -0.0012, - 0.50, - 0.0); - - gf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000015, - -0.0009, - 0.32, - 0.0); - gf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000021, - -0.00125, - 0.50, - 0.0); - afq_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000015, - -0.0009, - 0.33, - 0.0); - afq_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000021, - -0.00125, - 0.55, - 0.0); - inter_minq[i] = calculate_minq_index(maxq, - 0.00000271, - -0.00113, - 0.75, - 0.0); + kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15); + kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50); + gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32); + gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); + afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33); + afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55); + inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75); } } @@ -138,79 +103,10 @@ int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, return (int)(0.5 + (enumerator * correction_factor / q)); } -void vp9_save_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - - // Stores a snapshot of key state variables which can subsequently be - // restored with a call to vp9_restore_coding_context. These functions are - // intended for use in a re-code loop in vp9_compress_frame where the - // quantizer value is adjusted between loop iterations. - vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); - vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts); - vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); - - vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); - - vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, - cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); - - vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); - vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); - - cc->fc = cm->fc; -} - -void vp9_restore_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - - // Restore key state variables to the snapshot state stored in the - // previous call to vp9_save_coding_context. - vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); - vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts); - vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); - - vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); - - vpx_memcpy(cm->last_frame_seg_map, - cpi->coding_context.last_frame_seg_map_copy, - (cm->mi_rows * cm->mi_cols)); - - vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); - vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); - - cm->fc = cc->fc; -} - -void vp9_setup_key_frame(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - vp9_setup_past_independence(cm); - - /* All buffers are implicitly updated on key frames. */ - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 1; -} - -void vp9_setup_inter_frame(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - if (cm->error_resilient_mode || cm->intra_only) - vp9_setup_past_independence(cm); - - assert(cm->frame_context_idx < FRAME_CONTEXTS); - cm->fc = cm->frame_contexts[cm->frame_context_idx]; -} - -static int estimate_bits_at_q(int frame_kind, int q, int mbs, +static int estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, double correction_factor) { - const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor)); - - // Attempt to retain reasonable accuracy without overflow. The cutoff is - // chosen such that the maximum product of Bpm and MBs fits 31 bits. The - // largest Bpm takes 20 bits. - return (mbs > (1 << 11)) ? (bpm >> BPER_MB_NORMBITS) * mbs - : (bpm * mbs) >> BPER_MB_NORMBITS; + const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor)); + return ((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS; } int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { @@ -247,13 +143,12 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { // Update the buffer level for higher layers, given the encoded current layer. -static void update_layer_buffer_level(VP9_COMP *const cpi, - int encoded_frame_size) { +static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { int temporal_layer = 0; - int current_temporal_layer = cpi->svc.temporal_layer_id; + int current_temporal_layer = svc->temporal_layer_id; for (temporal_layer = current_temporal_layer + 1; - temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + temporal_layer < svc->number_temporal_layers; ++temporal_layer) { + LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer]; RATE_CONTROL *lrc = &lc->rc; int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size); @@ -283,10 +178,60 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { rc->buffer_level = rc->bits_off_target; if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - update_layer_buffer_level(cpi, encoded_frame_size); + update_layer_buffer_level(&cpi->svc, encoded_frame_size); } } +void vp9_rc_init(const VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc) { + if (pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + rc->avg_frame_qindex[0] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[1] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[2] = oxcf->worst_allowed_q; + } else { + rc->avg_frame_qindex[0] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[1] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[2] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + } + + rc->last_q[0] = oxcf->best_allowed_q; + rc->last_q[1] = oxcf->best_allowed_q; + rc->last_q[2] = oxcf->best_allowed_q; + + rc->buffer_level = oxcf->starting_buffer_level; + rc->bits_off_target = oxcf->starting_buffer_level; + + rc->rolling_target_bits = rc->av_per_frame_bandwidth; + rc->rolling_actual_bits = rc->av_per_frame_bandwidth; + rc->long_rolling_target_bits = rc->av_per_frame_bandwidth; + rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth; + + rc->total_actual_bits = 0; + rc->total_target_vs_actual = 0; + + rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; + rc->frames_since_key = 8; // Sensible default for first frame. + rc->this_key_frame_forced = 0; + rc->next_key_frame_forced = 0; + rc->source_alt_ref_pending = 0; + rc->source_alt_ref_active = 0; + + rc->frames_till_gf_update_due = 0; + + rc->ni_av_qi = oxcf->worst_allowed_q; + rc->ni_tot_qi = 0; + rc->ni_frames = 0; + + rc->tot_q = 0.0; + rc->avg_q = vp9_convert_qindex_to_q(oxcf->worst_allowed_q); + + rc->rate_correction_factor = 1.0; + rc->key_frame_rate_correction_factor = 1.0; + rc->gf_rate_correction_factor = 1.0; +} + int vp9_rc_drop_frame(VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; @@ -330,6 +275,7 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) { return cpi->rc.key_frame_rate_correction_factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !cpi->rc.is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) return cpi->rc.gf_rate_correction_factor; else @@ -342,6 +288,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { cpi->rc.key_frame_rate_correction_factor = factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !cpi->rc.is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) cpi->rc.gf_rate_correction_factor = factor; else @@ -350,7 +297,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { } void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { - const int q = cpi->common.base_qindex; + const VP9_COMMON *const cm = &cpi->common; int correction_factor = 100; double rate_correction_factor = get_rate_correction_factor(cpi); double adjustment_limit; @@ -363,8 +310,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { // Work out how big we would have expected the frame to be at this Q given // the current correction factor. // Stay in double to avoid int overflow when values are large - projected_size_based_on_q = estimate_bits_at_q(cpi->common.frame_type, q, - cpi->common.MBs, + projected_size_based_on_q = estimate_bits_at_q(cm->frame_type, + cm->base_qindex, cm->MBs, rate_correction_factor); // Work out a size correction factor. if (projected_size_based_on_q > 0) @@ -388,20 +335,18 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { if (correction_factor > 102) { // We are not already at the worst allowable quality - correction_factor = - (int)(100 + ((correction_factor - 100) * adjustment_limit)); - rate_correction_factor = - ((rate_correction_factor * correction_factor) / 100); + correction_factor = (int)(100 + ((correction_factor - 100) * + adjustment_limit)); + rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor > MAX_BPB_FACTOR) rate_correction_factor = MAX_BPB_FACTOR; } else if (correction_factor < 99) { // We are not already at the best allowable quality - correction_factor = - (int)(100 - ((100 - correction_factor) * adjustment_limit)); - rate_correction_factor = - ((rate_correction_factor * correction_factor) / 100); + correction_factor = (int)(100 - ((100 - correction_factor) * + adjustment_limit)); + rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor < MIN_BPB_FACTOR) @@ -422,11 +367,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, // Calculate required scaling factor based on target frame size and size of // frame produced using previous Q. - if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) - // Case where we would overflow int - target_bits_per_mb = (target_bits_per_frame / cm->MBs) << BPER_MB_NORMBITS; - else - target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; + target_bits_per_mb = + ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; i = active_best_quality; @@ -493,6 +435,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { // If buffer is below the optimal level, let the active_worst_quality go from // ambient Q (at buffer = optimal level) to worst_quality level // (at buffer = critical level). + const VP9_COMMON *const cm = &cpi->common; const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; // Buffer level below which we push active_worst to worst_quality. @@ -500,9 +443,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { int64_t buff_lvl_step = 0; int adjustment = 0; int active_worst_quality; - if (cpi->common.frame_type == KEY_FRAME) + if (cm->frame_type == KEY_FRAME) return rc->worst_quality; - if (cpi->common.current_video_frame > 1) + if (cm->current_video_frame > 1) active_worst_quality = MIN(rc->worst_quality, rc->avg_frame_qindex[INTER_FRAME] * 5 / 4); else @@ -556,7 +499,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, (last_boosted_q * 0.75)); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { @@ -578,8 +521,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } } else if (!rc->is_src_frame_alt_ref && !cpi->use_svc && @@ -635,7 +578,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -668,8 +611,8 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, - (last_boosted_q * 0.75)); + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 0.75); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { // not first frame of one pass and kf_boost is set @@ -690,15 +633,15 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } #else double current_q; // Force the KF quantizer to be 30% of the active_worst_quality. current_q = vp9_convert_qindex_to_q(active_worst_quality); active_best_quality = active_worst_quality - + vp9_compute_qdelta(cpi, current_q, current_q * 0.3); + + vp9_compute_qdelta(rc, current_q, current_q * 0.3); #endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { @@ -801,7 +744,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -817,7 +760,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, assert(level >= 0); new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); q = active_worst_quality + - vp9_compute_qdelta(cpi, current_q, new_q); + vp9_compute_qdelta(rc, current_q, new_q); *bottom_index = q; *top_index = q; @@ -850,8 +793,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, - (last_boosted_q * 0.75)); + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 0.75); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else { // Not forced keyframe. @@ -875,15 +818,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } #else double current_q; // Force the KF quantizer to be 30% of the active_worst_quality. current_q = vp9_convert_qindex_to_q(active_worst_quality); active_best_quality = active_worst_quality - + vp9_compute_qdelta(cpi, current_q, current_q * 0.3); + + vp9_compute_qdelta(rc, current_q, current_q * 0.3); #endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { @@ -984,7 +927,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate. - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -1000,7 +943,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, assert(level >= 0); new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); q = active_worst_quality + - vp9_compute_qdelta(cpi, current_q, new_q); + vp9_compute_qdelta(rc, current_q, new_q); *bottom_index = q; *top_index = q; @@ -1016,8 +959,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, } int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, - int *bottom_index, - int *top_index) { + int *bottom_index, int *top_index) { int q; if (cpi->pass == 0) { if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) @@ -1028,14 +970,14 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index); } - // JBB : This is realtime mode. In real time mode the first frame - // should be larger. Q of 0 is disabled because we force tx size to be + // Q of 0 is disabled because we force tx size to be // 16x16... if (cpi->sf.use_nonrd_pick_mode) { - if (cpi->common.current_video_frame == 0) - q /= 3; if (q == 0) q++; + if (cpi->sf.force_frame_boost == 1) + q -= cpi->sf.max_delta_qindex; + if (q < *bottom_index) *bottom_index = q; else if (q > *top_index) @@ -1053,28 +995,14 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, *frame_under_shoot_limit = 0; *frame_over_shoot_limit = INT_MAX; } else { - if (cpi->common.frame_type == KEY_FRAME) { - *frame_over_shoot_limit = this_frame_target * 9 / 8; - *frame_under_shoot_limit = this_frame_target * 7 / 8; - } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) { - *frame_over_shoot_limit = this_frame_target * 9 / 8; - *frame_under_shoot_limit = this_frame_target * 7 / 8; - } else { - // Stron overshoot limit for constrained quality - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - *frame_over_shoot_limit = this_frame_target * 11 / 8; - *frame_under_shoot_limit = this_frame_target * 2 / 8; - } else { - *frame_over_shoot_limit = this_frame_target * 11 / 8; - *frame_under_shoot_limit = this_frame_target * 5 / 8; - } - } - } + int recode_tolerance = + (cpi->sf.recode_tolerance * this_frame_target) / 100; + + *frame_over_shoot_limit = this_frame_target + recode_tolerance; + *frame_under_shoot_limit = this_frame_target - recode_tolerance; // For very small rate targets where the fractional adjustment - // (eg * 7/8) may be tiny make sure there is at least a minimum - // range. + // may be tiny make sure there is at least a minimum range. *frame_over_shoot_limit += 200; *frame_under_shoot_limit -= 200; if (*frame_under_shoot_limit < 0) @@ -1099,16 +1027,17 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) { static void update_alt_ref_frame_stats(VP9_COMP *cpi) { // this frame refreshes means next frames don't unless specified by user - cpi->rc.frames_since_golden = 0; + RATE_CONTROL *const rc = &cpi->rc; + rc->frames_since_golden = 0; #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif // Clear the alternate reference update pending flag. - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; // Set the alternate reference frame active flag - cpi->rc.source_alt_ref_active = 1; + rc->source_alt_ref_active = 1; } static void update_golden_frame_stats(VP9_COMP *cpi) { @@ -1137,6 +1066,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { VP9_COMMON *const cm = &cpi->common; + const VP9_CONFIG *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; cm->last_frame_type = cm->frame_type; @@ -1146,7 +1076,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // Post encode loop adjustment of Q prediction. vp9_rc_update_rate_correction_factors( cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF || - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); + oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); // Keep a record of last Q and ambient average Q. if (cm->frame_type == KEY_FRAME) { @@ -1155,7 +1085,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2); } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && - !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) { + !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) { rc->last_q[2] = cm->base_qindex; rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO( 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2); @@ -1201,12 +1131,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // Actual bits spent rc->total_actual_bits += rc->projected_frame_size; + rc->total_target_bits += (cm->show_frame ? rc->av_per_frame_bandwidth : 0); - // Debug stats - rc->total_target_vs_actual += (rc->this_frame_target - - rc->projected_frame_size); + rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame && + if (oxcf->play_alternate && cpi->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); @@ -1239,15 +1168,15 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) { static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { static const int af_ratio = 10; - const RATE_CONTROL *rc = &cpi->rc; + const RATE_CONTROL *const rc = &cpi->rc; int target; #if USE_ALTREF_FOR_ONE_PASS target = (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ? - (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval * af_ratio) / - (cpi->rc.baseline_gf_interval + af_ratio - 1) : - (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval) / - (cpi->rc.baseline_gf_interval + af_ratio - 1); + (rc->av_per_frame_bandwidth * rc->baseline_gf_interval * af_ratio) / + (rc->baseline_gf_interval + af_ratio - 1) : + (rc->av_per_frame_bandwidth * rc->baseline_gf_interval) / + (rc->baseline_gf_interval + af_ratio - 1); #else target = rc->av_per_frame_bandwidth; #endif @@ -1299,18 +1228,19 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; + const SVC *const svc = &cpi->svc; const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level; const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100; int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); int target = rc->av_per_frame_bandwidth; - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if (svc->number_temporal_layers > 1 && + oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { // Note that for layers, av_per_frame_bandwidth is the cumulative // per-frame-bandwidth. For the target size of this frame, use the // layer average frame size (i.e., non-cumulative per-frame-bw). - int current_temporal_layer = cpi->svc.temporal_layer_id; - const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer]; + int current_temporal_layer = svc->temporal_layer_id; + const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer]; target = lc->avg_frame_size; min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); } @@ -1347,13 +1277,14 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { void vp9_rc_get_svc_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - int target = cpi->rc.av_per_frame_bandwidth; + RATE_CONTROL *const rc = &cpi->rc; + int target = rc->av_per_frame_bandwidth; if ((cm->current_video_frame == 0) || (cm->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % + (cpi->oxcf.auto_key && (rc->frames_since_key % cpi->key_frame_frequency == 0))) { cm->frame_type = KEY_FRAME; - cpi->rc.source_alt_ref_active = 0; + rc->source_alt_ref_active = 0; if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { target = calc_iframe_target_size_one_pass_cbr(cpi); } @@ -1364,8 +1295,8 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { } } vp9_rc_set_frame_target(cpi, target); - cpi->rc.frames_till_gf_update_due = INT_MAX; - cpi->rc.baseline_gf_interval = INT_MAX; + rc->frames_till_gf_update_due = INT_MAX; + rc->baseline_gf_interval = INT_MAX; } void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { @@ -1392,3 +1323,46 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { rc->frames_till_gf_update_due = INT_MAX; rc->baseline_gf_interval = INT_MAX; } + +int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget) { + int start_index = rc->worst_quality; + int target_index = rc->worst_quality; + int i; + + // Convert the average q value to an index. + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + start_index = i; + if (vp9_convert_qindex_to_q(i) >= qstart) + break; + } + + // Convert the q target to an index + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + target_index = i; + if (vp9_convert_qindex_to_q(i) >= qtarget) + break; + } + + return target_index - start_index; +} + +int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, + int qindex, double rate_target_ratio) { + int target_index = rc->worst_quality; + int i; + + // Look up the current projected bits per block for the base index + const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0); + + // Find the target bits per mb based on the base value and given ratio. + const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); + + // Convert the q target to an index + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + target_index = i; + if (vp9_rc_bits_per_mb(frame_type, i, 1.0) <= target_bits_per_mb ) + break; + } + + return target_index - qindex; +} diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h index ed6266f..7693c2b 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/libvpx/vp9/encoder/vp9_ratectrl.h @@ -22,6 +22,9 @@ extern "C" { #define FRAME_OVERHEAD_BITS 200 +// Bits Per MB at different Q (Multiplied by 512) +#define BPER_MB_NORMBITS 9 + typedef struct { // Rate targetting variables int this_frame_target; @@ -58,7 +61,7 @@ typedef struct { int ni_av_qi; int ni_tot_qi; int ni_frames; - int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF + int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF double tot_q; double avg_q; @@ -75,7 +78,8 @@ typedef struct { int long_rolling_actual_bits; int64_t total_actual_bits; - int total_target_vs_actual; // debug stats + int64_t total_target_bits; + int64_t total_target_vs_actual; int worst_quality; int best_quality; @@ -83,17 +87,13 @@ typedef struct { } RATE_CONTROL; struct VP9_COMP; +struct VP9_CONFIG; -void vp9_save_coding_context(struct VP9_COMP *cpi); -void vp9_restore_coding_context(struct VP9_COMP *cpi); - -void vp9_setup_key_frame(struct VP9_COMP *cpi); -void vp9_setup_inter_frame(struct VP9_COMP *cpi); +void vp9_rc_init(const struct VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc); double vp9_convert_qindex_to_q(int qindex); -// initialize luts for minq -void vp9_rc_init_minq_luts(void); +void vp9_rc_init_minq_luts(); // Generally at the high level, the following flow is expected // to be enforced for rate control: @@ -166,6 +166,15 @@ int vp9_rc_clamp_pframe_target_size(const struct VP9_COMP *const cpi, // This function is called only from the vp9_rc_get_..._params() functions. void vp9_rc_set_frame_target(struct VP9_COMP *cpi, int target); +// Computes a q delta (in "q index" terms) to get from a starting q value +// to a target q value +int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget); + +// Computes a q delta (in "q index" terms) to get from a starting q value +// to a value that should equate to the given rate ratio. +int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, + int qindex, double rate_target_ratio); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c index 2fd25ef..dcd2852 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.c +++ b/libvpx/vp9/encoder/vp9_rdopt.c @@ -244,7 +244,6 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { static void set_block_thresholds(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; - const SPEED_FEATURES *const sf = &cpi->sf; int i, bsize, segment_id; for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { @@ -261,13 +260,13 @@ static void set_block_thresholds(VP9_COMP *cpi) { for (i = 0; i < MAX_MODES; ++i) cpi->rd_threshes[segment_id][bsize][i] = - sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4 + cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4 : INT_MAX; for (i = 0; i < MAX_REFS; ++i) { cpi->rd_thresh_sub8x8[segment_id][bsize][i] = - sf->thresh_mult_sub8x8[i] < thresh_max - ? sf->thresh_mult_sub8x8[i] * t / 4 + cpi->rd_thresh_mult_sub8x8[i] < thresh_max + ? cpi->rd_thresh_mult_sub8x8[i] * t / 4 : INT_MAX; } } @@ -433,7 +432,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int i; int64_t rate_sum = 0; int64_t dist_sum = 0; - const int ref = xd->mi_8x8[0]->mbmi.ref_frame[0]; + const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; for (i = 0; i < MAX_MB_PLANE; ++i) { @@ -557,7 +556,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, const int16_t *scan, const int16_t *nb, int use_fast_coef_costing) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct macroblock_plane *p = &x->plane[plane]; const struct macroblockd_plane *pd = &xd->plane[plane]; const PLANE_TYPE type = pd->plane_type; @@ -566,7 +565,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = x->token_costs[tx_size][type][is_inter_block(mbmi)]; - uint8_t *p_tok = x->token_cache; + uint8_t token_cache[32 * 32]; int pt = combine_entropy_contexts(*A, *L); int c, cost; // Check for consistency of tx_size with mode info @@ -584,7 +583,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, int v = qcoeff[0]; int prev_t = vp9_dct_value_tokens_ptr[v].token; cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; - p_tok[0] = vp9_pt_energy_class[prev_t]; + token_cache[0] = vp9_pt_energy_class[prev_t]; ++token_costs; // ac tokens @@ -597,9 +596,9 @@ static INLINE int cost_coeffs(MACROBLOCK *x, if (use_fast_coef_costing) { cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v]; } else { - pt = get_coef_context(nb, p_tok, c); + pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; - p_tok[rc] = vp9_pt_energy_class[t]; + token_cache[rc] = vp9_pt_energy_class[t]; } prev_t = t; if (!--band_left) { @@ -613,7 +612,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, if (use_fast_coef_costing) { cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; } else { - pt = get_coef_context(nb, p_tok, c); + pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[0][pt][EOB_TOKEN]; } } @@ -639,7 +638,7 @@ static void dist_block(int plane, int block, TX_SIZE tx_size, &this_sse) >> shift; args->sse = this_sse >> shift; - if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) { + if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) { // TODO(jingning): tune the model to better capture the distortion. int64_t p = (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> (shift + 2); @@ -664,7 +663,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; int64_t rd1, rd2, rd; if (args->skip) @@ -750,7 +749,7 @@ static void txfm_rd_in_plane(MACROBLOCK *x, args.use_fast_coef_costing = use_fast_coef_casting; if (plane == 0) - xd->mi_8x8[0]->mbmi.tx_size = tx_size; + xd->mi[0]->mbmi.tx_size = tx_size; vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); @@ -780,7 +779,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; mbmi->tx_size = MIN(max_tx_size, largest_tx_size); @@ -799,7 +798,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, @@ -882,7 +881,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, @@ -952,7 +951,7 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int r[TX_SIZES][2], s[TX_SIZES]; int64_t d[TX_SIZES], sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const TX_SIZE max_tx_size = max_txsize_lookup[bs]; TX_SIZE tx_size; @@ -995,7 +994,7 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t ref_best_rd) { int64_t sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; assert(bs == mbmi->sb_type); if (cpi->sf.tx_size_search_method != USE_FULL_RD) { @@ -1071,7 +1070,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vpx_memcpy(ta, a, sizeof(ta)); vpx_memcpy(tl, l, sizeof(tl)); - xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.tx_size = TX_4X4; for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; @@ -1100,7 +1099,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); - xd->mi_8x8[0]->bmi[block].as_mode = mode; + xd->mi[0]->bmi[block].as_mode = mode; vp9_predict_intra_block(xd, block, 1, TX_4X4, mode, x->skip_encode ? src : dst, @@ -1173,10 +1172,10 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, int64_t best_rd) { int i, j; const MACROBLOCKD *const xd = &mb->e_mbd; - MODE_INFO *const mic = xd->mi_8x8[0]; - const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; - const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; - const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; + MODE_INFO *const mic = xd->mi[0]; + const MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; + const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; @@ -1243,7 +1242,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode_selected = DC_PRED; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mic = xd->mi_8x8[0]; + MODE_INFO *const mic = xd->mi[0]; int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd; TX_SIZE best_tx = TX_4X4; @@ -1257,8 +1256,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, /* Y Search for intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { int64_t local_tx_cache[TX_MODES]; - MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; - MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; + MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; + MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode))) continue; @@ -1312,7 +1311,7 @@ static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int64_t *sse, BLOCK_SIZE bsize, int64_t ref_best_rd) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi); int plane; int pnrate = 0, pnskip = 1; @@ -1369,7 +1368,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; - xd->mi_8x8[0]->mbmi.uv_mode = mode; + xd->mi[0]->mbmi.uv_mode = mode; super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd); @@ -1410,7 +1409,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - xd->mi_8x8[0]->mbmi.uv_mode = mode_selected; + xd->mi[0]->mbmi.uv_mode = mode_selected; return best_rd; } @@ -1421,7 +1420,7 @@ static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, const VP9_COMMON *cm = &cpi->common; int64_t unused; - x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED; + x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED; super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, bsize, INT64_MAX); *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED]; @@ -1447,13 +1446,13 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size); } - *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode; + *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; } -static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, +static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode, int mode_context) { - MACROBLOCK *const x = &cpi->mb; - const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id; + const MACROBLOCK *const x = &cpi->mb; + const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id; // Don't account for mode here if segment skip is enabled. if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { @@ -1478,7 +1477,7 @@ static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i, int_mv seg_mvs[MAX_REF_FRAMES], int_mv *best_ref_mv[2], const int *mvjcost, int *mvcost[2]) { - MODE_INFO *const mic = xd->mi_8x8[0]; + MODE_INFO *const mic = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mic->mbmi; int thismvcost = 0; int idx, idy; @@ -1546,7 +1545,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCKD *xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; @@ -1560,6 +1559,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, int thisrate = 0, ref; const scan_order *so = &vp9_default_scan_orders[TX_4X4]; const int is_compound = has_second_ref(&mi->mbmi); + const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); + for (ref = 0; ref < 1 + is_compound; ++ref) { const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i, pd->pre[ref].stride)]; @@ -1567,7 +1568,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, ref, - xd->interp_kernel, MV_PRECISION_Q3, + kernel, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); } @@ -1643,7 +1644,7 @@ static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) { } static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { - MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; @@ -1658,7 +1659,7 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, struct buf_2d orig_pre[2]) { - MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; x->plane[0].src = orig_src; x->e_mbd.plane[0].pre[0] = orig_pre[0]; if (has_second_ref(mbmi)) @@ -1669,6 +1670,45 @@ static INLINE int mv_has_subpel(const MV *mv) { return (mv->row & 0x0F) || (mv->col & 0x0F); } +// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. +// TODO(aconverse): Find out if this is still productive then clean up or remove +static int check_best_zero_mv( + const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], + int disable_inter_mode_mask, int this_mode, int ref_frame, + int second_ref_frame) { + if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && + frame_mv[this_mode][ref_frame].as_int == 0 && + (second_ref_frame == NONE || + frame_mv[this_mode][second_ref_frame].as_int == 0)) { + int rfc = mode_context[ref_frame]; + int c1 = cost_mv_ref(cpi, NEARMV, rfc); + int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); + int c3 = cost_mv_ref(cpi, ZEROMV, rfc); + + if (this_mode == NEARMV) { + if (c1 > c3) return 0; + } else if (this_mode == NEARESTMV) { + if (c2 > c3) return 0; + } else { + assert(this_mode == ZEROMV); + if (second_ref_frame == NONE) { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0)) + return 0; + } else { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 && + frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 && + frame_mv[NEARMV][second_ref_frame].as_int == 0)) + return 0; + } + } + } + return 1; +} + static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BEST_SEG_INFO *bsi_buf, int filter_idx, @@ -1679,7 +1719,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, MB_PREDICTION_MODE this_mode; MACROBLOCKD *xd = &x->e_mbd; VP9_COMMON *cm = &cpi->common; - MODE_INFO *mi = xd->mi_8x8[0]; + MODE_INFO *mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; @@ -1737,43 +1777,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (disable_inter_mode_mask & (1 << mode_idx)) continue; - // if we're near/nearest and mv == 0,0, compare to zeromv - if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && - (this_mode == NEARMV || this_mode == NEARESTMV || - this_mode == ZEROMV) && - frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 && - (!has_second_rf || - frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) { - int rfc = mbmi->mode_context[mbmi->ref_frame[0]]; - int c1 = cost_mv_ref(cpi, NEARMV, rfc); - int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); - int c3 = cost_mv_ref(cpi, ZEROMV, rfc); - - if (this_mode == NEARMV) { - if (c1 > c3) - continue; - } else if (this_mode == NEARESTMV) { - if (c2 > c3) - continue; - } else { - assert(this_mode == ZEROMV); - if (!has_second_rf) { - if ((c3 >= c2 && - frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0)) - continue; - } else { - if ((c3 >= c2 && - frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 && - frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && - frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) - continue; - } - } - } + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + disable_inter_mode_mask, + this_mode, mbmi->ref_frame[0], + mbmi->ref_frame[1])) + continue; vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre)); vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above, @@ -2090,7 +2098,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, int i; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = xd->mi_8x8[0]; + MODE_INFO *mi = xd->mi[0]; MB_MODE_INFO *mbmi = &mi->mbmi; int mode_idx; @@ -2137,7 +2145,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size ) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int_mv this_mv; int i; int zero_seen = 0; @@ -2267,7 +2275,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, // restored if we decide to encode this way ctx->skip = x->skip; ctx->best_mode_index = mode_index; - ctx->mic = *xd->mi_8x8[0]; + ctx->mic = *xd->mi[0]; ctx->best_ref_mv[0].as_int = ref_mv->as_int; ctx->best_ref_mv[1].as_int = second_ref_mv->as_int; @@ -2318,7 +2326,7 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const VP9_COMMON *cm = &cpi->common; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; @@ -2350,9 +2358,9 @@ const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; } -static INLINE int get_switchable_rate(const MACROBLOCK *x) { +int vp9_get_switchable_rate(const MACROBLOCK *x) { const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int ctx = vp9_get_pred_context_switchable_interp(xd); return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[ctx][mbmi->interp_filter]; @@ -2365,7 +2373,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; VP9_COMMON *cm = &cpi->common; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int bestsme = INT_MAX; int further_steps, step_param; @@ -2531,13 +2539,14 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; int_mv ref_mv[2]; int ite, ref; // Prediction buffer from second frame. uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); + const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter); // Do joint motion search in compound mode to get more accurate mv. struct buf_2d backup_yv12[2][MAX_MB_PLANE]; @@ -2591,7 +2600,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &frame_mv[refs[!id]].as_mv, &xd->block_refs[!id]->sf, pw, ph, 0, - xd->interp_kernel, MV_PRECISION_Q3, + kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); // Compound motion search on first ref frame. @@ -2692,7 +2701,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const int64_t ref_best_rd) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_comp_pred = has_second_ref(mbmi); const int num_refs = is_comp_pred ? 2 : 1; const int this_mode = mbmi->mode; @@ -2744,7 +2753,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, return INT64_MAX; *rate2 += rate_mv; frame_mv[refs[0]].as_int = - xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; + xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; single_newmv[refs[0]].as_int = tmp_mv.as_int; } } @@ -2806,8 +2815,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int j; int64_t rs_rd; mbmi->interp_filter = i; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - rs = get_switchable_rate(x); + rs = vp9_get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (i > 0 && intpel_mv) { @@ -2877,8 +2885,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Set the appropriate filter mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : *best_filter; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0; + rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0; if (pred_exists) { if (best_needs_copy) { @@ -2908,7 +2915,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) - *rate2 += get_switchable_rate(x); + *rate2 += vp9_get_switchable_rate(x); if (!is_comp_pred) { if (!x->in_active_map) { @@ -3066,7 +3073,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE max_uv_tx_size; x->skip_encode = 0; ctx->skip = 0; - xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; + xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; if (bsize >= BLOCK_8X8) { if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, @@ -3075,7 +3082,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize); + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, bsize, max_uv_tx_size); } else { @@ -3085,7 +3092,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize); + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size); } @@ -3108,7 +3115,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - ctx->mic = *xd->mi_8x8[0]; + ctx->mic = *xd->mi[0]; } int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, @@ -3121,9 +3128,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; @@ -3189,7 +3195,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; @@ -3371,46 +3377,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } } else { - // TODO(aconverse): Find out if this is still productive then clean up or - // remove - // if we're near/nearest and mv == 0,0, compare to zeromv if (x->in_active_map && - !(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && - (this_mode == NEARMV || this_mode == NEARESTMV || - this_mode == ZEROMV) && - frame_mv[this_mode][ref_frame].as_int == 0 && - !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && - (!comp_pred || frame_mv[this_mode][second_ref_frame].as_int == 0)) { - int rfc = mbmi->mode_context[ref_frame]; - int c1 = cost_mv_ref(cpi, NEARMV, rfc); - int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); - int c3 = cost_mv_ref(cpi, ZEROMV, rfc); - - if (this_mode == NEARMV) { - if (c1 > c3) - continue; - } else if (this_mode == NEARESTMV) { - if (c2 > c3) - continue; - } else { - assert(this_mode == ZEROMV); - if (!comp_pred) { - if ((c3 >= c2 && - frame_mv[NEARESTMV][ref_frame].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][ref_frame].as_int == 0)) - continue; - } else { - if ((c3 >= c2 && - frame_mv[NEARESTMV][ref_frame].as_int == 0 && - frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][ref_frame].as_int == 0 && - frame_mv[NEARMV][second_ref_frame].as_int == 0)) - continue; - } - } - } + !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + disable_inter_mode_mask, this_mode, ref_frame, + second_ref_frame)) + continue; } mbmi->mode = this_mode; @@ -3423,7 +3395,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, : cm->interp_filter; x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { @@ -3788,9 +3759,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_rd_so_far) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct segmentation *seg = &cm->seg; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; int comp_pred, i; @@ -3850,7 +3820,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } @@ -3968,7 +3938,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // them for this frame. mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (comp_pred) { if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) @@ -4067,7 +4036,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR]; this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; - xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.tx_size = TX_4X4; cpi->mask_filter_rd = 0; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) @@ -4091,7 +4060,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int newbest, rs; int64_t rs_rd; mbmi->interp_filter = switchable_filter_index; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, &mbmi->ref_mvs[ref_frame][0], second_ref, @@ -4104,7 +4072,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; - rs = get_switchable_rate(x); + rs = vp9_get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = @@ -4131,7 +4099,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, tmp_best_skippable = skippable; tmp_best_mbmode = *mbmi; for (i = 0; i < 4; i++) { - tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; + tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; } pred_exists = 1; @@ -4156,7 +4124,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter : cm->interp_filter); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (!pred_exists) { // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level @@ -4179,14 +4146,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, skippable = tmp_best_skippable; *mbmi = tmp_best_mbmode; for (i = 0; i < 4; i++) - xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i]; + xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; } rate2 += rate; distortion2 += distortion; if (cm->interp_filter == SWITCHABLE) - rate2 += get_switchable_rate(x); + rate2 += vp9_get_switchable_rate(x); if (!mode_excluded) mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE @@ -4263,8 +4230,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } // Keep record of best inter rd with single reference - if (is_inter_block(&xd->mi_8x8[0]->mbmi) && - !has_second_ref(&xd->mi_8x8[0]->mbmi) && + if (is_inter_block(&xd->mi[0]->mbmi) && + !has_second_ref(&xd->mi[0]->mbmi) && !mode_excluded && this_rd < best_inter_rd) { best_inter_rd = this_rd; @@ -4304,7 +4271,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, sizeof(uint8_t) * ctx->num_4x4_blk); for (i = 0; i < 4; i++) - best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; + best_bmodes[i] = xd->mi[0]->bmi[i]; // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history @@ -4453,13 +4420,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, x->skip |= best_skip2; if (!is_inter_block(&best_mbmode)) { for (i = 0; i < 4; i++) - xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode; + xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; } else { for (i = 0; i < 4; ++i) - vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); + vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); - mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int; - mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int; + mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; + mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; } for (i = 0; i < REFERENCE_MODES; ++i) { diff --git a/libvpx/vp9/encoder/vp9_rdopt.h b/libvpx/vp9/encoder/vp9_rdopt.h index 6968fa6..a01dbd4 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.h +++ b/libvpx/vp9/encoder/vp9_rdopt.h @@ -40,6 +40,8 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, unsigned int qstep, int *rate, int64_t *dist); +int vp9_get_switchable_rate(const MACROBLOCK *x); + void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, MV_REFERENCE_FRAME ref_frame, diff --git a/libvpx/vp9/encoder/vp9_sad.c b/libvpx/vp9/encoder/vp9_sad.c index 58c5df4..9d8da0d 100644 --- a/libvpx/vp9/encoder/vp9_sad.c +++ b/libvpx/vp9/encoder/vp9_sad.c @@ -44,7 +44,7 @@ unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *second_pred, \ unsigned int max_sad) { \ uint8_t comp_pred[m * n]; \ - comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ + vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ return sad(src_ptr, src_stride, comp_pred, m, m, n); \ } diff --git a/libvpx/vp9/encoder/vp9_segmentation.c b/libvpx/vp9/encoder/vp9_segmentation.c index fd8fa53..9d3e6dc 100644 --- a/libvpx/vp9/encoder/vp9_segmentation.c +++ b/libvpx/vp9/encoder/vp9_segmentation.c @@ -133,8 +133,8 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - xd->mi_8x8 = mi_8x8; - segment_id = xd->mi_8x8[0]->mbmi.segment_id; + xd->mi = mi_8x8; + segment_id = xd->mi[0]->mbmi.segment_id; set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); @@ -152,7 +152,7 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, // Store the prediction status for this mb and update counts // as appropriate - xd->mi_8x8[0]->mbmi.seg_id_predicted = pred_flag; + xd->mi[0]->mbmi.seg_id_predicted = pred_flag; temporal_predictor_count[pred_context][pred_flag]++; if (!pred_flag) @@ -169,7 +169,7 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP9_COMMON *const cm = &cpi->common; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int bw, bh; const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2; @@ -229,7 +229,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { vp9_prob t_pred_tree[SEG_TREE_PROBS]; vp9_prob t_nopred_prob[PREDICTION_PROBS]; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; MODE_INFO **mi_ptr, **mi; // Set default state for the segment tree probabilities and the diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c new file mode 100644 index 0000000..d6b6174 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_speed_features.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits.h> + +#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_speed_features.h" + +#define ALL_INTRA_MODES ((1 << DC_PRED) | \ + (1 << V_PRED) | (1 << H_PRED) | \ + (1 << D45_PRED) | (1 << D135_PRED) | \ + (1 << D117_PRED) | (1 << D153_PRED) | \ + (1 << D207_PRED) | (1 << D63_PRED) | \ + (1 << TM_PRED)) +#define INTRA_DC_ONLY (1 << DC_PRED) +#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) +#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED)) +#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) + +// Masks for partially or completely disabling split mode +#define DISABLE_ALL_INTER_SPLIT ((1 << THR_COMP_GA) | \ + (1 << THR_COMP_LA) | \ + (1 << THR_ALTR) | \ + (1 << THR_GOLD) | \ + (1 << THR_LAST)) + +#define DISABLE_ALL_SPLIT ((1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT) + +#define DISABLE_COMPOUND_SPLIT ((1 << THR_COMP_GA) | (1 << THR_COMP_LA)) + +#define LAST_AND_INTRA_SPLIT_ONLY ((1 << THR_COMP_GA) | \ + (1 << THR_COMP_LA) | \ + (1 << THR_ALTR) | \ + (1 << THR_GOLD)) + +static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, + SPEED_FEATURES *sf, int speed) { + sf->adaptive_rd_thresh = 1; + sf->recode_loop = (speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW; + sf->allow_skip_recode = 1; + + if (speed >= 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD + : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->subpel_iters_per_step = 1; + sf->mode_skip_start = 10; + sf->adaptive_pred_interp_filter = 1; + + sf->recode_loop = ALLOW_RECODE_KFARFGF; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + } + + if (speed >= 2) { + sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD + : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->adaptive_pred_interp_filter = 2; + sf->reference_masking = 1; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + sf->disable_filter_search_var_thresh = 100; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + } + + if (speed >= 3) { + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = DISABLE_ALL_SPLIT; + else + sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; + + sf->recode_loop = ALLOW_RECODE_KFMAXBW; + sf->adaptive_rd_thresh = 3; + sf->mode_skip_start = 6; + sf->use_fast_coef_updates = ONE_LOOP_REDUCED; + sf->use_fast_coef_costing = 1; + } + + if (speed >= 4) { + sf->use_square_partition_only = 1; + sf->tx_size_search_method = USE_LARGESTALL; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->adaptive_rd_thresh = 4; + sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH | + FLAG_EARLY_TERMINATE; + sf->disable_filter_search_var_thresh = 200; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->use_lp32x32fdct = 1; + } + + if (speed >= 5) { + int i; + + sf->partition_search_type = FIXED_PARTITION; + sf->optimize_coefficients = 0; + sf->search_method = HEX; + sf->disable_filter_search_var_thresh = 500; + for (i = 0; i < TX_SIZES; ++i) { + sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; + } +} + +static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, + int speed) { + sf->static_segmentation = 0; + sf->adaptive_rd_thresh = 1; + sf->encode_breakout_thresh = 1; + sf->use_fast_coef_costing = 1; + + if (speed == 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD + : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_interp_filter = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->encode_breakout_thresh = 8; + } + + if (speed >= 2) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD + : USE_LARGESTALL; + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_interp_filter = 2; + sf->auto_mv_step_size = 1; + sf->reference_masking = 1; + + sf->disable_filter_search_var_thresh = 50; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->adaptive_rd_thresh = 2; + sf->use_lp32x32fdct = 1; + sf->mode_skip_start = 11; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->encode_breakout_thresh = 200; + } + + if (speed >= 3) { + sf->use_square_partition_only = 1; + sf->disable_filter_search_var_thresh = 100; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->constrain_copy_partition = 1; + sf->use_uv_intra_rd_estimate = 1; + sf->skip_encode_sb = 1; + sf->subpel_iters_per_step = 1; + sf->use_fast_coef_updates = ONE_LOOP_REDUCED; + sf->adaptive_rd_thresh = 4; + sf->mode_skip_start = 6; + sf->allow_skip_recode = 0; + sf->optimize_coefficients = 0; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->lpf_pick = LPF_PICK_FROM_Q; + sf->encode_breakout_thresh = 700; + } + + if (speed >= 4) { + int i; + sf->last_partitioning_redo_frequency = 4; + sf->adaptive_rd_thresh = 5; + sf->use_fast_coef_costing = 0; + sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; + sf->adjust_partitioning_from_last_frame = + cm->last_frame_type != cm->frame_type || (0 == + (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency); + sf->subpel_force_stop = 1; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; + sf->frame_parameter_update = 0; + sf->encode_breakout_thresh = 1000; + sf->search_method = FAST_HEX; + sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); + sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->max_intra_bsize = BLOCK_32X32; + sf->allow_skip_recode = 1; + } + + if (speed >= 5) { + sf->max_partition_size = BLOCK_32X32; + sf->min_partition_size = BLOCK_8X8; + sf->partition_check = + (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1); + sf->force_frame_boost = cm->frame_type == KEY_FRAME || + (cm->current_video_frame % + (sf->last_partitioning_redo_frequency << 1) == 1); + sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15; + sf->partition_search_type = REFERENCE_PARTITION; + sf->use_nonrd_pick_mode = 1; + sf->search_method = FAST_DIAMOND; + sf->allow_skip_recode = 0; + } + + if (speed >= 6) { + // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION. + sf->partition_search_type = SOURCE_VAR_BASED_PARTITION; + sf->search_type_check_frequency = 50; + sf->source_var_thresh = 360; + + sf->use_nonrd_pick_mode = 1; + sf->search_method = FAST_DIAMOND; + } + + if (speed >= 7) { + int i; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->disable_inter_mode_mask[i] = ~(1 << INTER_OFFSET(NEARESTMV)); + } +} + +void vp9_set_speed_features(VP9_COMP *cpi) { + SPEED_FEATURES *const sf = &cpi->sf; + VP9_COMMON *const cm = &cpi->common; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + const int speed = cpi->speed < 0 ? -cpi->speed : cpi->speed; + int i; + + // best quality defaults + sf->frame_parameter_update = 1; + sf->search_method = NSTEP; + sf->recode_loop = ALLOW_RECODE; + sf->subpel_search_method = SUBPEL_TREE; + sf->subpel_iters_per_step = 2; + sf->subpel_force_stop = 0; + sf->optimize_coefficients = !oxcf->lossless; + sf->reduce_first_step_size = 0; + sf->auto_mv_step_size = 0; + sf->max_step_search_steps = MAX_MVSEARCH_STEPS; + sf->comp_inter_joint_search_thresh = BLOCK_4X4; + sf->adaptive_rd_thresh = 0; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; + sf->tx_size_search_method = USE_FULL_RD; + sf->use_lp32x32fdct = 0; + sf->adaptive_motion_search = 0; + sf->adaptive_pred_interp_filter = 0; + sf->reference_masking = 0; + sf->partition_search_type = SEARCH_PARTITION; + sf->less_rectangular_check = 0; + sf->use_square_partition_only = 0; + sf->auto_min_max_partition_size = NOT_IN_USE; + sf->max_partition_size = BLOCK_64X64; + sf->min_partition_size = BLOCK_4X4; + sf->adjust_partitioning_from_last_frame = 0; + sf->last_partitioning_redo_frequency = 4; + sf->constrain_copy_partition = 0; + sf->disable_split_mask = 0; + sf->mode_search_skip_flags = 0; + sf->force_frame_boost = 0; + sf->max_delta_qindex = 0; + sf->disable_split_var_thresh = 0; + sf->disable_filter_search_var_thresh = 0; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; + sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; + } + sf->use_rd_breakout = 0; + sf->skip_encode_sb = 0; + sf->use_uv_intra_rd_estimate = 0; + sf->allow_skip_recode = 0; + sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; + sf->use_fast_coef_updates = TWO_LOOP; + sf->use_fast_coef_costing = 0; + sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set + sf->use_nonrd_pick_mode = 0; + sf->encode_breakout_thresh = 0; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->disable_inter_mode_mask[i] = 0; + sf->max_intra_bsize = BLOCK_64X64; + // This setting only takes effect when partition_search_type is set + // to FIXED_PARTITION. + sf->always_this_block_size = BLOCK_16X16; + sf->search_type_check_frequency = 50; + sf->source_var_thresh = 100; + + // Recode loop tolerence %. + sf->recode_tolerance = 25; + + switch (oxcf->mode) { + case MODE_BESTQUALITY: + case MODE_SECONDPASS_BEST: // This is the best quality mode. + cpi->diamond_search_sad = vp9_full_range_search; + break; + case MODE_FIRSTPASS: + case MODE_GOODQUALITY: + case MODE_SECONDPASS: + set_good_speed_feature(cpi, cm, sf, speed); + break; + case MODE_REALTIME: + set_rt_speed_feature(cm, sf, speed); + break; + } + + // Slow quant, dct and trellis not worthwhile for first pass + // so make sure they are always turned off. + if (cpi->pass == 1) + sf->optimize_coefficients = 0; + + // No recode for 1 pass. + if (cpi->pass == 0) { + sf->recode_loop = DISALLOW_RECODE; + sf->optimize_coefficients = 0; + } + + if (sf->subpel_search_method == SUBPEL_TREE) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; + cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; + } + + cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1; + + if (cpi->encode_breakout && oxcf->mode == MODE_REALTIME && + sf->encode_breakout_thresh > cpi->encode_breakout) + cpi->encode_breakout = sf->encode_breakout_thresh; + + if (sf->disable_split_mask == DISABLE_ALL_SPLIT) + sf->adaptive_pred_interp_filter = 0; + + if (!cpi->oxcf.frame_periodic_boost) { + sf->max_delta_qindex = 0; + } +} diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h new file mode 100644 index 0000000..72f548a --- /dev/null +++ b/libvpx/vp9/encoder/vp9_speed_features.h @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_SPEED_FEATURES_H_ +#define VP9_ENCODER_VP9_SPEED_FEATURES_H_ + +#include "vp9/common/vp9_enums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + DIAMOND = 0, + NSTEP = 1, + HEX = 2, + BIGDIA = 3, + SQUARE = 4, + FAST_HEX = 5, + FAST_DIAMOND = 6 +} SEARCH_METHODS; + +typedef enum { + // No recode. + DISALLOW_RECODE = 0, + // Allow recode for KF and exceeding maximum frame bandwidth. + ALLOW_RECODE_KFMAXBW = 1, + // Allow recode only for KF/ARF/GF frames. + ALLOW_RECODE_KFARFGF = 2, + // Allow recode for all frames based on bitrate constraints. + ALLOW_RECODE = 3, +} RECODE_LOOP_TYPE; + +typedef enum { + SUBPEL_TREE = 0, + // Other methods to come +} SUBPEL_SEARCH_METHODS; + +typedef enum { + LAST_FRAME_PARTITION_OFF = 0, + LAST_FRAME_PARTITION_LOW_MOTION = 1, + LAST_FRAME_PARTITION_ALL = 2 +} LAST_FRAME_PARTITION_METHOD; + +typedef enum { + USE_FULL_RD = 0, + USE_LARGESTINTRA, + USE_LARGESTINTRA_MODELINTER, + USE_LARGESTALL +} TX_SIZE_SEARCH_METHOD; + +typedef enum { + NOT_IN_USE = 0, + RELAXED_NEIGHBORING_MIN_MAX = 1, + STRICT_NEIGHBORING_MIN_MAX = 2 +} AUTO_MIN_MAX_MODE; + +typedef enum { + // Try the full image with different values. + LPF_PICK_FROM_FULL_IMAGE, + // Try a small portion of the image with different values. + LPF_PICK_FROM_SUBIMAGE, + // Estimate the level based on quantizer and frame type + LPF_PICK_FROM_Q, +} LPF_PICK_METHOD; + +typedef enum { + // Terminate search early based on distortion so far compared to + // qp step, distortion in the neighborhood of the frame, etc. + FLAG_EARLY_TERMINATE = 1 << 0, + + // Skips comp inter modes if the best so far is an intra mode. + FLAG_SKIP_COMP_BESTINTRA = 1 << 1, + + // Skips comp inter modes if the best single intermode so far does + // not have the same reference as one of the two references being + // tested. + FLAG_SKIP_COMP_REFMISMATCH = 1 << 2, + + // Skips oblique intra modes if the best so far is an inter mode. + FLAG_SKIP_INTRA_BESTINTER = 1 << 3, + + // Skips oblique intra modes at angles 27, 63, 117, 153 if the best + // intra so far is not one of the neighboring directions. + FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, + + // Skips intra modes other than DC_PRED if the source variance is small + FLAG_SKIP_INTRA_LOWVAR = 1 << 5, +} MODE_SEARCH_SKIP_LOGIC; + +typedef enum { + // Search partitions using RD/NONRD criterion + SEARCH_PARTITION = 0, + + // Always use a fixed size partition + FIXED_PARTITION = 1, + + // Use a fixed size partition in every 64X64 SB, where the size is + // determined based on source variance + VAR_BASED_FIXED_PARTITION = 2, + + REFERENCE_PARTITION = 3, + + // Use an arbitrary partitioning scheme based on source variance within + // a 64X64 SB + VAR_BASED_PARTITION, + + // Use non-fixed partitions based on source variance + SOURCE_VAR_BASED_PARTITION +} PARTITION_SEARCH_TYPE; + +typedef enum { + // Does a dry run to see if any of the contexts need to be updated or not, + // before the final run. + TWO_LOOP = 0, + + // No dry run conducted. + ONE_LOOP = 1, + + // No dry run, also only half the coef contexts and bands are updated. + // The rest are not updated at all. + ONE_LOOP_REDUCED = 2 +} FAST_COEFF_UPDATE; + +typedef struct { + // Frame level coding parameter update + int frame_parameter_update; + + // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). + SEARCH_METHODS search_method; + + RECODE_LOOP_TYPE recode_loop; + + // Subpel_search_method can only be subpel_tree which does a subpixel + // logarithmic search that keeps stepping at 1/2 pixel units until + // you stop getting a gain, and then goes on to 1/4 and repeats + // the same process. Along the way it skips many diagonals. + SUBPEL_SEARCH_METHODS subpel_search_method; + + // Maximum number of steps in logarithmic subpel search before giving up. + int subpel_iters_per_step; + + // Control when to stop subpel search + int subpel_force_stop; + + // This parameter controls the number of steps we'll do in a diamond + // search. + int max_step_search_steps; + + // This parameter controls which step in the n-step process we start at. + // It's changed adaptively based on circumstances. + int reduce_first_step_size; + + // If this is set to 1, we limit the motion search range to 2 times the + // largest motion vector found in the last frame. + int auto_mv_step_size; + + // Trellis (dynamic programming) optimization of quantized values (+1, 0). + int optimize_coefficients; + + // Always set to 0. If on it enables 0 cost background transmission + // (except for the initial transmission of the segmentation). The feature is + // disabled because the addition of very large block sizes make the + // backgrounds very to cheap to encode, and the segmentation we have + // adds overhead. + int static_segmentation; + + // If 1 we iterate finding a best reference for 2 ref frames together - via + // a log search that iterates 4 times (check around mv for last for best + // error of combined predictor then check around mv for alt). If 0 we + // we just use the best motion vector found for each frame by itself. + int comp_inter_joint_search_thresh; + + // This variable is used to cap the maximum number of times we skip testing a + // mode to be evaluated. A high value means we will be faster. + int adaptive_rd_thresh; + + // Enables skipping the reconstruction step (idct, recon) in the + // intermediate steps assuming the last frame didn't have too many intra + // blocks and the q is less than a threshold. + int skip_encode_sb; + int skip_encode_frame; + // Speed feature to allow or disallow skipping of recode at block + // level within a frame. + int allow_skip_recode; + + // This variable allows us to reuse the last frames partition choices + // (64x64 v 32x32 etc) for this frame. It can be set to only use the last + // frame as a starting point in low motion scenes or always use it. If set + // we use last partitioning_redo frequency to determine how often to redo + // the partitioning from scratch. Adjust_partitioning_from_last_frame + // enables us to adjust up or down one partitioning from the last frames + // partitioning. + LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; + + // Determine which method we use to determine transform size. We can choose + // between options like full rd, largest for prediction size, largest + // for intra and model coefs for the rest. + TX_SIZE_SEARCH_METHOD tx_size_search_method; + + // Low precision 32x32 fdct keeps everything in 16 bits and thus is less + // precise but significantly faster than the non lp version. + int use_lp32x32fdct; + + // TODO(JBB): remove this as its no longer used. + + // After looking at the first set of modes (set by index here), skip + // checking modes for reference frames that don't match the reference frame + // of the best so far. + int mode_skip_start; + + // TODO(JBB): Remove this. + int reference_masking; + + PARTITION_SEARCH_TYPE partition_search_type; + + // Used if partition_search_type = FIXED_SIZE_PARTITION + BLOCK_SIZE always_this_block_size; + + // Skip rectangular partition test when partition type none gives better + // rd than partition type split. + int less_rectangular_check; + + // Disable testing non square partitions. (eg 16x32) + int use_square_partition_only; + + // Sets min and max partition sizes for this 64x64 region based on the + // same 64x64 in last encoded frame, and the left and above neighbor. + AUTO_MIN_MAX_MODE auto_min_max_partition_size; + + // Min and max partition size we enable (block_size) as per auto + // min max, but also used by adjust partitioning, and pick_partitioning. + BLOCK_SIZE min_partition_size; + BLOCK_SIZE max_partition_size; + + // Whether or not we allow partitions one smaller or one greater than the last + // frame's partitioning. Only used if use_lastframe_partitioning is set. + int adjust_partitioning_from_last_frame; + + // How frequently we re do the partitioning from scratch. Only used if + // use_lastframe_partitioning is set. + int last_partitioning_redo_frequency; + + // This enables constrained copy partitioning, which, given an input block + // size bsize, will copy previous partition for partitions less than bsize, + // otherwise bsize partition is used. bsize is currently set to 16x16. + // Used for the case where motion is detected in superblock. + int constrain_copy_partition; + + // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable + // it always, to allow it for only Last frame and Intra, disable it for all + // inter modes or to enable it always. + int disable_split_mask; + + // TODO(jingning): combine the related motion search speed features + // This allows us to use motion search at other sizes as a starting + // point for this motion search and limits the search range around it. + int adaptive_motion_search; + + // Allows sub 8x8 modes to use the prediction filter that was determined + // best for 8x8 mode. If set to 0 we always re check all the filters for + // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter + // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. + int adaptive_pred_interp_filter; + + // Search through variable block partition types in non-RD mode decision + // encoding process for RTC. + int partition_check; + + // Use finer quantizer in every other few frames that run variable block + // partition type search. + int force_frame_boost; + + // Maximally allowed base quantization index fluctuation. + int max_delta_qindex; + + // Implements various heuristics to skip searching modes + // The heuristics selected are based on flags + // defined in the MODE_SEARCH_SKIP_HEURISTICS enum + unsigned int mode_search_skip_flags; + + // A source variance threshold below which the split mode is disabled + unsigned int disable_split_var_thresh; + + // A source variance threshold below which filter search is disabled + // Choose a very large value (UINT_MAX) to use 8-tap always + unsigned int disable_filter_search_var_thresh; + + // These bit masks allow you to enable or disable intra modes for each + // transform size separately. + int intra_y_mode_mask[TX_SIZES]; + int intra_uv_mode_mask[TX_SIZES]; + + // This variable enables an early break out of mode testing if the model for + // rd built from the prediction signal indicates a value that's much + // higher than the best rd we've seen so far. + int use_rd_breakout; + + // This enables us to use an estimate for intra rd based on dc mode rather + // than choosing an actual uv mode in the stage of encoding before the actual + // final encode. + int use_uv_intra_rd_estimate; + + // This feature controls how the loop filter level is determined. + LPF_PICK_METHOD lpf_pick; + + // This feature limits the number of coefficients updates we actually do + // by only looking at counts from 1/2 the bands. + FAST_COEFF_UPDATE use_fast_coef_updates; + + // This flag controls the use of non-RD mode decision. + int use_nonrd_pick_mode; + + // This variable sets the encode_breakout threshold. Currently, it is only + // enabled in real time mode. + int encode_breakout_thresh; + + // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV + // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. + int disable_inter_mode_mask[BLOCK_SIZES]; + + // This feature controls whether we do the expensive context update and + // calculation in the rd coefficient costing loop. + int use_fast_coef_costing; + + // This feature controls the tolerence vs target used in deciding whether to + // recode a frame. It has no meaning if recode is disabled. + int recode_tolerance; + + // This variable controls the maximum block size where intra blocks can be + // used in inter frames. + // TODO(aconverse): Fold this into one of the other many mode skips + BLOCK_SIZE max_intra_bsize; + + // The frequency that we check if SOURCE_VAR_BASED_PARTITION or + // FIXED_PARTITION search type should be used. + int search_type_check_frequency; + + // The threshold used in SOURCE_VAR_BASED_PARTITION search type. + int source_var_thresh; +} SPEED_FEATURES; + +struct VP9_COMP; + +void vp9_set_speed_features(struct VP9_COMP *cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_ + diff --git a/libvpx/vp9/encoder/vp9_ssim.c b/libvpx/vp9/encoder/vp9_ssim.c index 1435191..026e6a8 100644 --- a/libvpx/vp9/encoder/vp9_ssim.c +++ b/libvpx/vp9/encoder/vp9_ssim.c @@ -8,8 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" -#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_ssim.h" void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, diff --git a/libvpx/vp9/encoder/vp9_ssim.h b/libvpx/vp9/encoder/vp9_ssim.h new file mode 100644 index 0000000..a581c2c --- /dev/null +++ b/libvpx/vp9/encoder/vp9_ssim.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_SSIM_H_ +#define VP9_ENCODER_VP9_SSIM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "vpx_scale/yv12config.h" + +double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + int lumamask, double *weight); + +double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *ssim_y, double *ssim_u, double *ssim_v); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_SSIM_H_ diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c index eba7bc6..c2b6263 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -14,17 +14,26 @@ #include "vp9/encoder/vp9_svc_layercontext.h" void vp9_init_layer_context(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; const VP9_CONFIG *const oxcf = &cpi->oxcf; - int temporal_layer = 0; - cpi->svc.spatial_layer_id = 0; - cpi->svc.temporal_layer_id = 0; - for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; - ++temporal_layer) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + int layer; + int layer_end; + + svc->spatial_layer_id = 0; + svc->temporal_layer_id = 0; + + if (svc->number_temporal_layers > 1) { + layer_end = svc->number_temporal_layers; + } else { + layer_end = svc->number_spatial_layers; + } + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; - lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; - lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; - lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q]; + lc->current_video_frame_in_layer = 0; + lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; + lrc->ni_av_qi = oxcf->worst_allowed_q; lrc->total_actual_bits = 0; lrc->total_target_vs_actual = 0; lrc->ni_tot_qi = 0; @@ -35,11 +44,19 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->decimation_factor = 0; lrc->rate_correction_factor = 1.0; lrc->key_frame_rate_correction_factor = 1.0; - lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * - 1000; - lrc->buffer_level = - vp9_rescale((int)(oxcf->starting_buffer_level), - lc->target_bandwidth, 1000); + + if (svc->number_temporal_layers > 1) { + lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; + } else { + lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + lrc->last_q[0] = oxcf->best_allowed_q; + lrc->last_q[1] = oxcf->best_allowed_q; + lrc->last_q[2] = oxcf->best_allowed_q; + } + + lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level), + lc->target_bandwidth, 1000); lrc->bits_off_target = lrc->buffer_level; } } @@ -47,16 +64,29 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { // Update the layer context from a change_config() call. void vp9_update_layer_context_change_config(VP9_COMP *const cpi, const int target_bandwidth) { + SVC *const svc = &cpi->svc; const VP9_CONFIG *const oxcf = &cpi->oxcf; const RATE_CONTROL *const rc = &cpi->rc; - int temporal_layer = 0; + int layer; + int layer_end; float bitrate_alloc = 1.0; - for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; - ++temporal_layer) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + + if (svc->number_temporal_layers > 1) { + layer_end = svc->number_temporal_layers; + } else { + layer_end = svc->number_spatial_layers; + } + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; - lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000; - bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth; + + if (svc->number_temporal_layers > 1) { + lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + } else { + lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + } + bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. lc->starting_buffer_level = (int64_t)(oxcf->starting_buffer_level * bitrate_alloc); @@ -67,7 +97,11 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size); // Update framerate-related quantities. - lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + if (svc->number_temporal_layers > 1) { + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; + } else { + lc->framerate = oxcf->framerate; + } lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = rc->max_frame_bandwidth; // Update qp-related quantities. @@ -76,34 +110,70 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, } } -void vp9_update_layer_framerate(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; +static LAYER_CONTEXT *get_layer_context(SVC *svc) { + return svc->number_temporal_layers > 1 ? + &svc->layer_context[svc->temporal_layer_id] : + &svc->layer_context[svc->spatial_layer_id]; +} + +void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; const VP9_CONFIG *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + LAYER_CONTEXT *const lc = get_layer_context(svc); RATE_CONTROL *const lrc = &lc->rc; - lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + const int layer = svc->temporal_layer_id; + + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; // Update the average layer frame size (non-cumulative per-frame-bw). - if (temporal_layer == 0) { + if (layer == 0) { lc->avg_frame_size = lrc->av_per_frame_bandwidth; } else { - double prev_layer_framerate = oxcf->framerate / - oxcf->ts_rate_decimator[temporal_layer - 1]; - int prev_layer_target_bandwidth = - oxcf->ts_target_bitrate[temporal_layer - 1] * 1000; + const double prev_layer_framerate = + oxcf->framerate / oxcf->ts_rate_decimator[layer - 1]; + const int prev_layer_target_bandwidth = + oxcf->ts_target_bitrate[layer - 1] * 1000; lc->avg_frame_size = (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / (lc->framerate - prev_layer_framerate)); } } +void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + RATE_CONTROL *const lrc = &lc->rc; + + lc->framerate = framerate; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->min_frame_bandwidth = (int)(lrc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmin_section / 100); + lrc->max_frame_bandwidth = (int)(((int64_t)lrc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmax_section) / 100); + lrc->max_gf_interval = 16; + + lrc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; + + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (lrc->max_gf_interval > oxcf->lag_in_frames - 1) + lrc->max_gf_interval = oxcf->lag_in_frames - 1; + + if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } + + if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval) + lrc->max_gf_interval = lrc->static_scene_max_gf_interval; +} + void vp9_restore_layer_context(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; - int frame_since_key = cpi->rc.frames_since_key; - int frame_to_key = cpi->rc.frames_to_key; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + const int old_frame_since_key = cpi->rc.frames_since_key; + const int old_frame_to_key = cpi->rc.frames_to_key; + cpi->rc = lc->rc; + cpi->twopass = lc->twopass; cpi->oxcf.target_bandwidth = lc->target_bandwidth; cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; @@ -111,17 +181,44 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->output_framerate = lc->framerate; // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). - cpi->rc.frames_since_key = frame_since_key; - cpi->rc.frames_to_key = frame_to_key; + if (cpi->svc.number_temporal_layers > 1) { + cpi->rc.frames_since_key = old_frame_since_key; + cpi->rc.frames_to_key = old_frame_to_key; + } } void vp9_save_layer_context(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + lc->rc = cpi->rc; - lc->target_bandwidth = (int)cpi->oxcf.target_bandwidth; - lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; - lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; - lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; + lc->twopass = cpi->twopass; + lc->target_bandwidth = (int)oxcf->target_bandwidth; + lc->starting_buffer_level = oxcf->starting_buffer_level; + lc->optimal_buffer_level = oxcf->optimal_buffer_level; + lc->maximum_buffer_size = oxcf->maximum_buffer_size; lc->framerate = cpi->output_framerate; } + +void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { + SVC *const svc = &cpi->svc; + int i; + + for (i = 0; i < svc->number_spatial_layers; ++i) { + struct twopass_rc *const twopass = &svc->layer_context[i].twopass; + + svc->spatial_layer_id = i; + vp9_init_second_pass(cpi); + + twopass->total_stats.spatial_layer_id = i; + twopass->total_left_stats.spatial_layer_id = i; + } + svc->spatial_layer_id = 0; +} + +void vp9_inc_frame_in_layer(SVC *svc) { + LAYER_CONTEXT *const lc = (svc->number_temporal_layers > 1) + ? &svc->layer_context[svc->temporal_layer_id] + : &svc->layer_context[svc->spatial_layer_id]; + ++lc->current_video_frame_in_layer; +} diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h index e81b0b7..2abed30 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -28,6 +28,8 @@ typedef struct { double framerate; int avg_frame_size; struct twopass_rc twopass; + struct vpx_fixed_buf rc_twopass_stats_in; + unsigned int current_video_frame_in_layer; } LAYER_CONTEXT; typedef struct { @@ -35,8 +37,8 @@ typedef struct { int temporal_layer_id; int number_spatial_layers; int number_temporal_layers; - // Layer context used for rate control in temporal CBR mode or spatial - // two pass mode. Defined for temporal or spatial layers for now. + // Layer context used for rate control in one pass temporal CBR mode or + // two pass spatial mode. Defined for temporal or spatial layers for now. // Does not support temporal combined with spatial RC. LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)]; } SVC; @@ -51,8 +53,12 @@ void vp9_update_layer_context_change_config(struct VP9_COMP *const cpi, const int target_bandwidth); // Prior to encoding the frame, update framerate-related quantities -// for the current layer. -void vp9_update_layer_framerate(struct VP9_COMP *const cpi); +// for the current temporal layer. +void vp9_update_temporal_layer_framerate(struct VP9_COMP *const cpi); + +// Update framerate-related quantities for the current spatial layer. +void vp9_update_spatial_layer_framerate(struct VP9_COMP *const cpi, + double framerate); // Prior to encoding the frame, set the layer context, for the current layer // to be encoded, to the cpi struct. @@ -61,6 +67,12 @@ void vp9_restore_layer_context(struct VP9_COMP *const cpi); // Save the layer context after encoding the frame. void vp9_save_layer_context(struct VP9_COMP *const cpi); +// Initialize second pass rc for spatial svc. +void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); + +// Increment number of video frames in layer +void vp9_inc_frame_in_layer(SVC *svc); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.c b/libvpx/vp9/encoder/vp9_temporal_filter.c index 6233116..0410273 100644 --- a/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -41,7 +41,10 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, struct scale_factors *scale, int x, int y) { const int which_mv = 0; - MV mv = { mv_row, mv_col }; + const MV mv = { mv_row, mv_col }; + const InterpKernel *const kernel = + vp9_get_interp_kernel(xd->mi[0]->mbmi.interp_filter); + enum mv_precision mv_precision_uv; int uv_stride; if (uv_block_size == 8) { @@ -58,7 +61,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, 16, 16, which_mv, - xd->interp_kernel, MV_PRECISION_Q3, x, y); + kernel, MV_PRECISION_Q3, x, y); vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_size, @@ -66,7 +69,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - xd->interp_kernel, mv_precision_uv, x, y); + kernel, mv_precision_uv, x, y); vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_size, @@ -74,7 +77,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - xd->interp_kernel, mv_precision_uv, x, y); + kernel, mv_precision_uv, x, y); } void vp9_temporal_filter_apply_c(uint8_t *frame1, @@ -133,7 +136,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, MV best_ref_mv1 = {0, 0}; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ - MV *ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0].as_mv; + MV *ref_mv = &x->e_mbd.mi[0]->bmi[0].as_mv[0].as_mv; // Save input state struct buf_2d src = x->plane[0].src; @@ -250,8 +253,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (cpi->frames[frame] == NULL) continue; - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row = 0; - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col = 0; + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0; + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0; if (frame == alt_ref_index) { filter_weight = 2; @@ -284,8 +287,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, cpi->frames[frame]->v_buffer + mb_uv_offset, cpi->frames[frame]->y_stride, mb_uv_height, - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row, - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale, mb_col * 16, mb_row * 16); diff --git a/libvpx/vp9/encoder/vp9_tokenize.c b/libvpx/vp9/encoder/vp9_tokenize.c index bb5f1c2..291ccb3 100644 --- a/libvpx/vp9/encoder/vp9_tokenize.c +++ b/libvpx/vp9/encoder/vp9_tokenize.c @@ -108,7 +108,7 @@ void vp9_coef_tree_initialize() { vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree); } -static void fill_value_tokens() { +void vp9_tokenize_initialize() { TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE; const vp9_extra_bit *const e = vp9_extra_bits; @@ -162,7 +162,6 @@ struct tokenize_b_args { VP9_COMP *cpi; MACROBLOCKD *xd; TOKENEXTRA **tp; - uint8_t *token_cache; }; static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, @@ -213,10 +212,10 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, VP9_COMP *cpi = args->cpi; MACROBLOCKD *xd = args->xd; TOKENEXTRA **tp = args->tp; - uint8_t *token_cache = args->token_cache; + uint8_t token_cache[32 * 32]; struct macroblock_plane *p = &cpi->mb.plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int pt; /* near block/prev token context index */ int c; TOKENEXTRA *t = *tp; /* store tokens starting here */ @@ -310,12 +309,12 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; TOKENEXTRA *t_backup = *t; const int ctx = vp9_get_skip_context(xd); const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); - struct tokenize_b_args arg = {cpi, xd, t, cpi->mb.token_cache}; + struct tokenize_b_args arg = {cpi, xd, t}; if (mbmi->skip) { if (!dry_run) cm->counts.skip[ctx][1] += skip_inc; @@ -333,7 +332,3 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, *t = t_backup; } } - -void vp9_tokenize_initialize() { - fill_value_tokens(); -} diff --git a/libvpx/vp9/encoder/vp9_variance.c b/libvpx/vp9/encoder/vp9_variance.c index 8bc3850..71867a9 100644 --- a/libvpx/vp9/encoder/vp9_variance.c +++ b/libvpx/vp9/encoder/vp9_variance.c @@ -216,7 +216,7 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); - comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); + vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); } @@ -273,7 +273,7 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -330,7 +330,7 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -387,7 +387,7 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -417,6 +417,12 @@ unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, return (var - (((int64_t)avg * avg) >> 10)); } +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); +} + unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -614,7 +620,7 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); - comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); + vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); } @@ -658,7 +664,7 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -703,7 +709,7 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, 1, 17, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -747,7 +753,7 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); - comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); + vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); } @@ -791,7 +797,7 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -955,7 +961,7 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -999,7 +1005,7 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -1043,7 +1049,7 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -1089,6 +1095,23 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); - comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); + vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); } + + +void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + int tmp; + tmp = pred[j] + ref[j]; + comp_pred[j] = (tmp + 1) >> 1; + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} diff --git a/libvpx/vp9/encoder/vp9_variance.h b/libvpx/vp9/encoder/vp9_variance.h index 3bc2091..62e20dc 100644 --- a/libvpx/vp9/encoder/vp9_variance.h +++ b/libvpx/vp9/encoder/vp9_variance.h @@ -100,21 +100,9 @@ typedef struct vp9_variance_vtable { vp9_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; -static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, - int height, const uint8_t *ref, int ref_stride) { - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - int tmp; - tmp = pred[j] + ref[j]; - comp_pred[j] = (tmp + 1) >> 1; - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} +void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_write_bit_buffer.c b/libvpx/vp9/encoder/vp9_write_bit_buffer.c new file mode 100644 index 0000000..962d0ca --- /dev/null +++ b/libvpx/vp9/encoder/vp9_write_bit_buffer.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/encoder/vp9_write_bit_buffer.h" + +size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { + return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); +} + +void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { + const int off = (int)wb->bit_offset; + const int p = off / CHAR_BIT; + const int q = CHAR_BIT - 1 - off % CHAR_BIT; + if (q == CHAR_BIT -1) { + wb->bit_buffer[p] = bit << q; + } else { + wb->bit_buffer[p] &= ~(1 << q); + wb->bit_buffer[p] |= bit << q; + } + wb->bit_offset = off + 1; +} + +void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits) { + int bit; + for (bit = bits - 1; bit >= 0; bit--) + vp9_wb_write_bit(wb, (data >> bit) & 1); +} diff --git a/libvpx/vp9/encoder/vp9_write_bit_buffer.h b/libvpx/vp9/encoder/vp9_write_bit_buffer.h index 1795e05..073608d 100644 --- a/libvpx/vp9/encoder/vp9_write_bit_buffer.h +++ b/libvpx/vp9/encoder/vp9_write_bit_buffer.h @@ -24,29 +24,11 @@ struct vp9_write_bit_buffer { size_t bit_offset; }; -static size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { - return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); -} - -static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { - const int off = (int)wb->bit_offset; - const int p = off / CHAR_BIT; - const int q = CHAR_BIT - 1 - off % CHAR_BIT; - if (q == CHAR_BIT -1) { - wb->bit_buffer[p] = bit << q; - } else { - wb->bit_buffer[p] &= ~(1 << q); - wb->bit_buffer[p] |= bit << q; - } - wb->bit_offset = off + 1; -} - -static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, - int data, int bits) { - int bit; - for (bit = bits - 1; bit >= 0; bit--) - vp9_wb_write_bit(wb, (data >> bit) & 1); -} +size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb); + +void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit); + +void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits); #ifdef __cplusplus diff --git a/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c new file mode 100644 index 0000000..f31b176 --- /dev/null +++ b/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <immintrin.h> // AVX2 +#include "vpx/vpx_integer.h" + +void vp9_sad32x32x4d_avx2(uint8_t *src, + int src_stride, + uint8_t *ref[4], + int ref_stride, + unsigned int res[4]) { + __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg; + __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; + __m256i sum_mlow, sum_mhigh; + int i; + uint8_t *ref0, *ref1, *ref2, *ref3; + + ref0 = ref[0]; + ref1 = ref[1]; + ref2 = ref[2]; + ref3 = ref[3]; + sum_ref0 = _mm256_set1_epi16(0); + sum_ref1 = _mm256_set1_epi16(0); + sum_ref2 = _mm256_set1_epi16(0); + sum_ref3 = _mm256_set1_epi16(0); + for (i = 0; i < 32 ; i++) { + // load src and all refs + src_reg = _mm256_load_si256((__m256i *)(src)); + ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); + ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); + ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); + ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); + // sum of the absolute differences between every ref-i to src + ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); + ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); + ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); + ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); + // sum every ref-i + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); + + src+= src_stride; + ref0+= ref_stride; + ref1+= ref_stride; + ref2+= ref_stride; + ref3+= ref_stride; + } + { + __m128i sum; + // in sum_ref-i the result is saved in the first 4 bytes + // the other 4 bytes are zeroed. + // sum_ref1 and sum_ref3 are shifted left by 4 bytes + sum_ref1 = _mm256_slli_si256(sum_ref1, 4); + sum_ref3 = _mm256_slli_si256(sum_ref3, 4); + + // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 + sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); + sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); + + // merge every 64 bit from each sum_ref-i + sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); + sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); + + // add the low 64 bit to the high 64 bit + sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); + + // add the low 128 bit to the high 128 bit + sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), + _mm256_extractf128_si256(sum_mlow, 1)); + + _mm_storeu_si128((__m128i *)(res), sum); + } +} + +void vp9_sad64x64x4d_avx2(uint8_t *src, + int src_stride, + uint8_t *ref[4], + int ref_stride, + unsigned int res[4]) { + __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg; + __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg; + __m256i ref3_reg, ref3next_reg; + __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; + __m256i sum_mlow, sum_mhigh; + int i; + uint8_t *ref0, *ref1, *ref2, *ref3; + + ref0 = ref[0]; + ref1 = ref[1]; + ref2 = ref[2]; + ref3 = ref[3]; + sum_ref0 = _mm256_set1_epi16(0); + sum_ref1 = _mm256_set1_epi16(0); + sum_ref2 = _mm256_set1_epi16(0); + sum_ref3 = _mm256_set1_epi16(0); + for (i = 0; i < 64 ; i++) { + // load 64 bytes from src and all refs + src_reg = _mm256_load_si256((__m256i *)(src)); + srcnext_reg = _mm256_load_si256((__m256i *)(src + 32)); + ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); + ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32)); + ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); + ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32)); + ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); + ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32)); + ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); + ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32)); + // sum of the absolute differences between every ref-i to src + ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); + ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); + ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); + ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); + ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg); + ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg); + ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg); + ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg); + + // sum every ref-i + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg); + src+= src_stride; + ref0+= ref_stride; + ref1+= ref_stride; + ref2+= ref_stride; + ref3+= ref_stride; + } + { + __m128i sum; + + // in sum_ref-i the result is saved in the first 4 bytes + // the other 4 bytes are zeroed. + // sum_ref1 and sum_ref3 are shifted left by 4 bytes + sum_ref1 = _mm256_slli_si256(sum_ref1, 4); + sum_ref3 = _mm256_slli_si256(sum_ref3, 4); + + // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 + sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); + sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); + + // merge every 64 bit from each sum_ref-i + sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); + sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); + + // add the low 64 bit to the high 64 bit + sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); + + // add the low 128 bit to the high 128 bit + sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), + _mm256_extractf128_si256(sum_mlow, 1)); + + _mm_storeu_si128((__m128i *)(res), sum); + } +} |