diff options
Diffstat (limited to 'libvpx/vp9/common/vp9_loopfilter.c')
-rw-r--r-- | libvpx/vp9/common/vp9_loopfilter.c | 503 |
1 files changed, 276 insertions, 227 deletions
diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c index ff504a1..af8afed 100644 --- a/libvpx/vp9/common/vp9_loopfilter.c +++ b/libvpx/vp9/common/vp9_loopfilter.c @@ -16,26 +16,6 @@ #include "vp9/common/vp9_seg_common.h" -// This structure holds bit masks for all 8x8 blocks in a 64x64 region. -// Each 1 bit represents a position in which we want to apply the loop filter. -// Left_ entries refer to whether we apply a filter on the border to the -// left of the block. Above_ entries refer to whether or not to apply a -// filter on the above border. Int_ entries refer to whether or not to -// apply borders on the 4x4 edges within the 8x8 block that each bit -// represents. -// Since each transform is accompanied by a potentially different type of -// loop filter there is a different entry in the array for each transform size. -typedef struct { - uint64_t left_y[TX_SIZES]; - uint64_t above_y[TX_SIZES]; - uint64_t int_4x4_y; - uint16_t left_uv[TX_SIZES]; - uint16_t above_uv[TX_SIZES]; - uint16_t int_4x4_uv; - uint8_t lfl_y[64]; - uint8_t lfl_uv[16]; -} LOOP_FILTER_MASK; - // 64 bit masks for left transform size. Each 1 represents a position where // we should apply a loop filter across the left border of an 8x8 block // boundary. @@ -221,23 +201,10 @@ static const uint16_t size_mask_uv[BLOCK_SIZES] = { static const uint16_t left_border_uv = 0x1111; static const uint16_t above_border_uv = 0x000f; - -static void lf_init_lut(loop_filter_info_n *lfi) { - lfi->mode_lf_lut[DC_PRED] = 0; - lfi->mode_lf_lut[D45_PRED] = 0; - lfi->mode_lf_lut[D135_PRED] = 0; - lfi->mode_lf_lut[D117_PRED] = 0; - lfi->mode_lf_lut[D153_PRED] = 0; - lfi->mode_lf_lut[D207_PRED] = 0; - lfi->mode_lf_lut[D63_PRED] = 0; - lfi->mode_lf_lut[V_PRED] = 0; - lfi->mode_lf_lut[H_PRED] = 0; - lfi->mode_lf_lut[TM_PRED] = 0; - lfi->mode_lf_lut[ZEROMV] = 0; - lfi->mode_lf_lut[NEARESTMV] = 1; - lfi->mode_lf_lut[NEARMV] = 1; - lfi->mode_lf_lut[NEWMV] = 1; -} +static const int mode_lf_lut[MB_MODE_COUNT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES + 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) +}; static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { int lvl; @@ -270,9 +237,6 @@ void vp9_loop_filter_init(VP9_COMMON *cm) { update_sharpness(lfi, lf->sharpness_level); lf->last_sharpness_level = lf->sharpness_level; - // init LUT for lvl and hev thr picking - lf_init_lut(lfi); - // init hev threshold const vectors for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) vpx_memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); @@ -283,10 +247,10 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { // n_shift is the a multiplier for lf_deltas // the multiplier is 1 for when filter_lvl is between 0 and 31; // 2 when filter_lvl is between 32 and 63 - const int n_shift = default_filt_lvl >> 5; + const int scale = 1 << (default_filt_lvl >> 5); loop_filter_info_n *const lfi = &cm->lf_info; struct loopfilter *const lf = &cm->lf; - struct segmentation *const seg = &cm->seg; + const struct segmentation *const seg = &cm->seg; // update limits if sharpness has changed if (lf->last_sharpness_level != lf->sharpness_level) { @@ -295,86 +259,130 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { } for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { - int lvl_seg = default_filt_lvl, ref, mode, intra_lvl; - - // Set the baseline filter values for each segment + int lvl_seg = default_filt_lvl; if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF); - lvl_seg = seg->abs_delta == SEGMENT_ABSDATA - ? data - : clamp(default_filt_lvl + data, 0, MAX_LOOP_FILTER); + lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? + data : default_filt_lvl + data, + 0, MAX_LOOP_FILTER); } if (!lf->mode_ref_delta_enabled) { // we could get rid of this if we assume that deltas are set to // zero when not in use; encoder always uses deltas vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); - continue; - } - - intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * (1 << n_shift); - lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); - - for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) - for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { - const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * (1 << n_shift) - + lf->mode_deltas[mode] * (1 << n_shift); - lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); + } else { + int ref, mode; + const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; + lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); + + for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { + for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { + const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale + + lf->mode_deltas[mode] * scale; + lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); + } } + } } } -static uint8_t build_lfi(const loop_filter_info_n *lfi_n, - const MB_MODE_INFO *mbmi) { - const int seg = mbmi->segment_id; - const int ref = mbmi->ref_frame[0]; - const int mode = lfi_n->mode_lf_lut[mbmi->mode]; - const int filter_level = lfi_n->lvl[seg][ref][mode]; - - return filter_level; -} - -static void filter_selectively_vert(uint8_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, - const uint8_t *lfl) { +static void filter_selectively_vert_row2(PLANE_TYPE plane_type, + uint8_t *s, int pitch, + unsigned int mask_16x16_l, + unsigned int mask_8x8_l, + unsigned int mask_4x4_l, + unsigned int mask_4x4_int_l, + const loop_filter_info_n *lfi_n, + const uint8_t *lfl) { + const int mask_shift = plane_type ? 4 : 8; + const int mask_cutoff = plane_type ? 0xf : 0xff; + const int lfl_forward = plane_type ? 4 : 8; + + unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; + unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; + unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; + unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; + unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; + unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; + unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; + unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; unsigned int mask; - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; + for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | + mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; + mask; mask >>= 1) { + const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; + const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); + // TODO(yunqingwang): count in loopfilter functions should be removed. if (mask & 1) { - if (mask_16x16 & 1) { - vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - assert(!(mask_8x8 & 1)); - assert(!(mask_4x4 & 1)); - assert(!(mask_4x4_int & 1)); - } else if (mask_8x8 & 1) { - vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); - assert(!(mask_16x16 & 1)); - assert(!(mask_4x4 & 1)); - } else if (mask_4x4 & 1) { - vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); - assert(!(mask_16x16 & 1)); - assert(!(mask_8x8 & 1)); + if ((mask_16x16_0 | mask_16x16_1) & 1) { + if ((mask_16x16_0 & mask_16x16_1) & 1) { + vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr); + } else if (mask_16x16_0 & 1) { + vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr); + } else { + vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, + lfi1->lim, lfi1->hev_thr); + } + } + + if ((mask_8x8_0 | mask_8x8_1) & 1) { + if ((mask_8x8_0 & mask_8x8_1) & 1) { + vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, lfi1->mblim, lfi1->lim, + lfi1->hev_thr); + } else if (mask_8x8_0 & 1) { + vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, + 1); + } else { + vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, + lfi1->hev_thr, 1); + } + } + + if ((mask_4x4_0 | mask_4x4_1) & 1) { + if ((mask_4x4_0 & mask_4x4_1) & 1) { + vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, lfi1->mblim, lfi1->lim, + lfi1->hev_thr); + } else if (mask_4x4_0 & 1) { + vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, + 1); + } else { + vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, + lfi1->hev_thr, 1); + } + } + + if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { + if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { + vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, lfi1->mblim, lfi1->lim, + lfi1->hev_thr); + } else if (mask_4x4_int_0 & 1) { + vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, 1); + } else { + vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, + lfi1->hev_thr, 1); + } } } - if (mask_4x4_int & 1) - vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + s += 8; lfl += 1; - mask_16x16 >>= 1; - mask_8x8 >>= 1; - mask_4x4 >>= 1; - mask_4x4_int >>= 1; + mask_16x16_0 >>= 1; + mask_8x8_0 >>= 1; + mask_4x4_0 >>= 1; + mask_4x4_int_0 >>= 1; + mask_16x16_1 >>= 1; + mask_8x8_1 >>= 1; + mask_4x4_1 >>= 1; + mask_4x4_int_1 >>= 1; } } @@ -396,95 +404,73 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2); + vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 2); count = 2; } else { - vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); } - assert(!(mask_8x8 & 1)); - assert(!(mask_4x4 & 1)); - assert(!(mask_4x4_int & 1)); } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); - // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering. - vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); - vp9_mbloop_filter_horizontal_edge(s + 8, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr); if ((mask_4x4_int & 3) == 3) { - // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering. - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); - vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, - lfin->mblim, lfin->lim, - lfin->hev_thr, 1); + vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, lfin->mblim, + lfin->lim, lfin->hev_thr); } else { if (mask_4x4_int & 1) - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); else if (mask_4x4_int & 2) - vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, - lfin->mblim, lfin->lim, - lfin->hev_thr, 1); + vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr, 1); } count = 2; } else { - vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (mask_4x4_int & 1) - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); } - assert(!(mask_16x16 & 1)); - assert(!(mask_4x4 & 1)); } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); - // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering. - vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); - vp9_loop_filter_horizontal_edge(s + 8, pitch, lfin->mblim, lfin->lim, - lfin->hev_thr, 1); - + vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr); if ((mask_4x4_int & 3) == 3) { - // TODO(yunqingwang): Combine next 2 calls as 1 wide filtering. - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); - vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, - lfin->mblim, lfin->lim, - lfin->hev_thr, 1); + vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, lfin->mblim, + lfin->lim, lfin->hev_thr); } else { if (mask_4x4_int & 1) - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); else if (mask_4x4_int & 2) - vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, - lfin->mblim, lfin->lim, - lfin->hev_thr, 1); + vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr, 1); } count = 2; } else { - vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - if (mask_4x4_int & 1) - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + if (mask_4x4_int & 1) + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); } - assert(!(mask_16x16 & 1)); - assert(!(mask_8x8 & 1)); } else if (mask_4x4_int & 1) { - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); } } s += 8 * count; @@ -510,11 +496,10 @@ static void build_masks(const loop_filter_info_n *const lfi_n, const BLOCK_SIZE block_size = mi->mbmi.sb_type; const TX_SIZE tx_size_y = mi->mbmi.tx_size; const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi); - const int skip = mi->mbmi.skip_coeff; + const int skip = mi->mbmi.skip; const int seg = mi->mbmi.segment_id; const int ref = mi->mbmi.ref_frame[0]; - const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode]; - const int filter_level = lfi_n->lvl[seg][ref][mode]; + const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]]; uint64_t *left_y = &lfm->left_y[tx_size_y]; uint64_t *above_y = &lfm->above_y[tx_size_y]; uint64_t *int_4x4_y = &lfm->int_4x4_y; @@ -592,11 +577,10 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, LOOP_FILTER_MASK *lfm) { const BLOCK_SIZE block_size = mi->mbmi.sb_type; const TX_SIZE tx_size_y = mi->mbmi.tx_size; - const int skip = mi->mbmi.skip_coeff; + const int skip = mi->mbmi.skip; const int seg = mi->mbmi.segment_id; const int ref = mi->mbmi.ref_frame[0]; - const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode]; - const int filter_level = lfi_n->lvl[seg][ref][mode]; + const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]]; uint64_t *left_y = &lfm->left_y[tx_size_y]; uint64_t *above_y = &lfm->above_y[tx_size_y]; uint64_t *int_4x4_y = &lfm->int_4x4_y; @@ -634,9 +618,9 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. // TODO(JBB): This function only works for yv12. -static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, - MODE_INFO **mi_8x8, const int mode_info_stride, - LOOP_FILTER_MASK *lfm) { +void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, + MODE_INFO **mi_8x8, const int mode_info_stride, + LOOP_FILTER_MASK *lfm) { int idx_32, idx_16, idx_8; const loop_filter_info_n *const lfi_n = &cm->lf_info; MODE_INFO **mip = mi_8x8; @@ -864,9 +848,66 @@ static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, lfm->left_uv[i] &= 0xeeee; } } + + // Assert if we try to apply 2 different loop filters at the same position. + assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); + assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); + assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); + assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); + assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); + assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); + assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); + assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); + assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); + assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); + assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); + assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); + assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); + assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); + assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); + assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); +} + +static uint8_t build_lfi(const loop_filter_info_n *lfi_n, + const MB_MODE_INFO *mbmi) { + const int seg = mbmi->segment_id; + const int ref = mbmi->ref_frame[0]; + return lfi_n->lvl[seg][ref][mode_lf_lut[mbmi->mode]]; +} + +static void filter_selectively_vert(uint8_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_info_n *lfi_n, + const uint8_t *lfl) { + unsigned int mask; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; + mask; mask >>= 1) { + const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; + + if (mask & 1) { + if (mask_16x16 & 1) { + vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + } else if (mask_8x8 & 1) { + vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + } else if (mask_4x4 & 1) { + vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + } + } + if (mask_4x4_int & 1) + vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + s += 8; + lfl += 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; + } } -#if CONFIG_NON420 static void filter_block_plane_non420(VP9_COMMON *cm, struct macroblockd_plane *plane, MODE_INFO **mi_8x8, @@ -894,15 +935,15 @@ static void filter_block_plane_non420(VP9_COMMON *cm, // Determine the vertical edges that need filtering for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { const MODE_INFO *mi = mi_8x8[c]; - const int skip_this = mi[0].mbmi.skip_coeff - && is_inter_block(&mi[0].mbmi); + const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; + const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); // left edge of current unit is block/partition edge -> no skip - const int block_edge_left = b_width_log2(mi[0].mbmi.sb_type) ? - !(c & ((1 << (b_width_log2(mi[0].mbmi.sb_type)-1)) - 1)) : 1; + const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? + !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; const int skip_this_c = skip_this && !block_edge_left; // top edge of current unit is block/partition edge -> no skip - const int block_edge_above = b_height_log2(mi[0].mbmi.sb_type) ? - !(r & ((1 << (b_height_log2(mi[0].mbmi.sb_type)-1)) - 1)) : 1; + const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? + !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; const int skip_this_r = skip_this && !block_edge_above; const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) ? get_uv_tx_size(&mi[0].mbmi) @@ -1004,15 +1045,13 @@ static void filter_block_plane_non420(VP9_COMMON *cm, dst->buf += 8 * dst->stride; } } -#endif -static void filter_block_plane(VP9_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { +void vp9_filter_block_plane(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { struct buf_2d *const dst = &plane->dst; uint8_t* const dst0 = dst->buf; - unsigned int mask_4x4_int_row[MI_BLOCK_SIZE] = {0}; int r, c; if (!plane->plane_type) { @@ -1021,23 +1060,27 @@ static void filter_block_plane(VP9_COMMON *const cm, uint64_t mask_4x4 = lfm->left_y[TX_4X4]; uint64_t mask_4x4_int = lfm->int_4x4_y; - // Vertical pass - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { - mask_4x4_int_row[r] = mask_4x4_int & 0xff; + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + unsigned int mask_16x16_l = mask_16x16 & 0xffff; + unsigned int mask_8x8_l = mask_8x8 & 0xffff; + unsigned int mask_4x4_l = mask_4x4 & 0xffff; + unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; // Disable filtering on the leftmost column - filter_selectively_vert(dst->buf, dst->stride, - mask_16x16 & 0xff, - mask_8x8 & 0xff, - mask_4x4 & 0xff, - mask_4x4_int_row[r], - &cm->lf_info, &lfm->lfl_y[r << 3]); - - dst->buf += 8 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; + filter_selectively_vert_row2(plane->plane_type, + dst->buf, dst->stride, + mask_16x16_l, + mask_8x8_l, + mask_4x4_l, + mask_4x4_int_l, + &cm->lf_info, &lfm->lfl_y[r << 3]); + + dst->buf += 16 * dst->stride; + mask_16x16 >>= 16; + mask_8x8 >>= 16; + mask_4x4 >>= 16; + mask_4x4_int >>= 16; } // Horizontal pass @@ -1045,6 +1088,7 @@ static void filter_block_plane(VP9_COMMON *const cm, mask_16x16 = lfm->above_y[TX_16X16]; mask_8x8 = lfm->above_y[TX_8X8]; mask_4x4 = lfm->above_y[TX_4X4]; + mask_4x4_int = lfm->int_4x4_y; for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { unsigned int mask_16x16_r; @@ -1065,13 +1109,14 @@ static void filter_block_plane(VP9_COMMON *const cm, mask_16x16_r, mask_8x8_r, mask_4x4_r, - mask_4x4_int_row[r], + mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3]); dst->buf += 8 * dst->stride; mask_16x16 >>= 8; mask_8x8 >>= 8; mask_4x4 >>= 8; + mask_4x4_int >>= 8; } } else { uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; @@ -1079,27 +1124,37 @@ static void filter_block_plane(VP9_COMMON *const cm, uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; uint16_t mask_4x4_int = lfm->int_4x4_uv; - // Vertical pass - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { if (plane->plane_type == 1) { - for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) + for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; + lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + + (c << 1)]; + } } - mask_4x4_int_row[r] = mask_4x4_int & 0xf; - // Disable filtering on the leftmost column - filter_selectively_vert(dst->buf, dst->stride, - mask_16x16 & 0xf, - mask_8x8 & 0xf, - mask_4x4 & 0xf, - mask_4x4_int_row[r], - &cm->lf_info, &lfm->lfl_uv[r << 1]); - - dst->buf += 8 * dst->stride; - mask_16x16 >>= 4; - mask_8x8 >>= 4; - mask_4x4 >>= 4; - mask_4x4_int >>= 4; + { + unsigned int mask_16x16_l = mask_16x16 & 0xff; + unsigned int mask_8x8_l = mask_8x8 & 0xff; + unsigned int mask_4x4_l = mask_4x4 & 0xff; + unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; + + // Disable filtering on the leftmost column + filter_selectively_vert_row2(plane->plane_type, + dst->buf, dst->stride, + mask_16x16_l, + mask_8x8_l, + mask_4x4_l, + mask_4x4_int_l, + &cm->lf_info, &lfm->lfl_uv[r << 1]); + + dst->buf += 16 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; + } } // Horizontal pass @@ -1107,11 +1162,12 @@ static void filter_block_plane(VP9_COMMON *const cm, mask_16x16 = lfm->above_uv[TX_16X16]; mask_8x8 = lfm->above_uv[TX_8X8]; mask_4x4 = lfm->above_uv[TX_4X4]; + mask_4x4_int = lfm->int_4x4_uv; for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; const unsigned int mask_4x4_int_r = skip_border_4x4_r ? - 0 : (mask_4x4_int_row[r]); + 0 : (mask_4x4_int & 0xf); unsigned int mask_16x16_r; unsigned int mask_8x8_r; unsigned int mask_4x4_r; @@ -1137,6 +1193,7 @@ static void filter_block_plane(VP9_COMMON *const cm, mask_16x16 >>= 4; mask_8x8 >>= 4; mask_4x4 >>= 4; + mask_4x4_int >>= 4; } } } @@ -1147,10 +1204,8 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, const int num_planes = y_only ? 1 : MAX_MB_PLANE; int mi_row, mi_col; LOOP_FILTER_MASK lfm; -#if CONFIG_NON420 int use_420 = y_only || (xd->plane[1].subsampling_y == 1 && xd->plane[1].subsampling_x == 1); -#endif for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride; @@ -1158,25 +1213,19 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { int plane; - setup_dst_planes(xd, frame_buffer, mi_row, mi_col); + vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); // TODO(JBB): Make setup_mask work for non 420. -#if CONFIG_NON420 if (use_420) -#endif - setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride, - &lfm); + vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, + cm->mode_info_stride, &lfm); for (plane = 0; plane < num_planes; ++plane) { -#if CONFIG_NON420 if (use_420) -#endif - filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); -#if CONFIG_NON420 + vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); else filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col, mi_row, mi_col); -#endif } } } @@ -1184,12 +1233,12 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, - int y_only, int partial) { + int y_only, int partial_frame) { int start_mi_row, end_mi_row, mi_rows_to_filter; if (!frame_filter_level) return; start_mi_row = 0; mi_rows_to_filter = cm->mi_rows; - if (partial && cm->mi_rows > 8) { + if (partial_frame && cm->mi_rows > 8) { start_mi_row = cm->mi_rows >> 1; start_mi_row &= 0xfffffff8; mi_rows_to_filter = MAX(cm->mi_rows / 8, 8); |