summaryrefslogtreecommitdiffstats
path: root/common/ihevc_deblk_edge_filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'common/ihevc_deblk_edge_filter.c')
-rw-r--r--common/ihevc_deblk_edge_filter.c1510
1 files changed, 1510 insertions, 0 deletions
diff --git a/common/ihevc_deblk_edge_filter.c b/common/ihevc_deblk_edge_filter.c
new file mode 100644
index 0000000..8b6e6ea
--- /dev/null
+++ b/common/ihevc_deblk_edge_filter.c
@@ -0,0 +1,1510 @@
+/******************************************************************************
+*
+* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*
+******************************************************************************/
+/**
+*******************************************************************************
+* @file
+* ihevc_deblk_edge_filter.c
+*
+* @brief
+* Contains function definitions for deblocking filters
+*
+* @author
+* Srinivas T
+*
+* @par List of Functions:
+* - ihevc_deblk_luma_vert()
+* - ihevc_deblk_luma_horz()
+* - ihevc_deblk_chroma_vert()
+* - ihevc_deblk_chroma_horz()
+* - ihevc_hbd_deblk_luma_vert()
+* - ihevc_hbd_deblk_luma_horz()
+* - ihevc_hbd_deblk_chroma_vert()
+* - ihevc_hbd_deblk_chroma_horz()
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_func_selector.h"
+#include "ihevc_deblk.h"
+#include "ihevc_deblk_tables.h"
+#include "ihevc_debug.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Decision process and filtering for the luma block vertical edge.
+*
+* @par Description:
+* The decision process for the luma block vertical edge is carried out and
+* an appropriate filter is applied. The boundary filter strength, bs should
+* be greater than 0. The pcm flags and the transquant bypass flags should
+* be taken care of by the calling function.
+*
+* @param[in] pu1_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_deblk_luma_vert(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 bs,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 beta_offset_div2,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q)
+{
+ WORD32 qp_luma, beta_indx, tc_indx;
+ WORD32 beta, tc;
+ WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
+ WORD32 d_sam0, d_sam3;
+ WORD32 de, dep, deq;
+ WORD32 row;
+ WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
+ WORD32 delta, delta_p, delta_q;
+
+ ASSERT((bs > 0) && (bs <= 3));
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
+ beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
+
+ /* BS based on implementation can take value 3 if it is intra/inter egde */
+ /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
+ /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2) */
+ /* the above desired functionallity is achieved by doing (2*(bs>>1)) */
+
+ tc_indx = CLIP3(qp_luma + (2 * (bs >> 1)) + (tc_offset_div2 << 1), 0, 53);
+
+ beta = gai4_ihevc_beta_table[beta_indx];
+ tc = gai4_ihevc_tc_table[tc_indx];
+ if(0 == tc)
+ {
+ return;
+ }
+
+ dq0 = ABS(pu1_src[2] - 2 * pu1_src[1] + pu1_src[0]);
+ dq3 = ABS(pu1_src[3 * src_strd + 2] - 2 * pu1_src[3 * src_strd + 1]
+ + pu1_src[3 * src_strd + 0]);
+ dp0 = ABS(pu1_src[-3] - 2 * pu1_src[-2] + pu1_src[-1]);
+ dp3 = ABS(pu1_src[3 * src_strd - 3] - 2 * pu1_src[3 * src_strd - 2]
+ + pu1_src[3 * src_strd - 1]);
+
+ d0 = dp0 + dq0;
+ d3 = dp3 + dq3;
+
+ dp = dp0 + dp3;
+ dq = dq0 + dq3;
+
+ d = d0 + d3;
+
+ de = 0;
+ dep = 0;
+ deq = 0;
+
+ if(d < beta)
+ {
+ d_sam0 = 0;
+ if((2 * d0 < (beta >> 2))
+ && (ABS(pu1_src[3] - pu1_src[0]) + ABS(pu1_src[-1] - pu1_src[-4])
+ < (beta >> 3))
+ && ABS(pu1_src[0] - pu1_src[-1]) < ((5 * tc + 1) >> 1))
+ {
+ d_sam0 = 1;
+ }
+
+ pu1_src += 3 * src_strd;
+ d_sam3 = 0;
+ if((2 * d3 < (beta >> 2))
+ && (ABS(pu1_src[3] - pu1_src[0]) + ABS(pu1_src[-1] - pu1_src[-4])
+ < (beta >> 3))
+ && ABS(pu1_src[0] - pu1_src[-1]) < ((5 * tc + 1) >> 1))
+ {
+ d_sam3 = 1;
+ }
+ pu1_src -= 3 * src_strd;
+
+ de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
+ dep = (dp < (beta + (beta >> 1)) >> 3) ? 1 : 0;
+ deq = (dq < (beta + (beta >> 1)) >> 3) ? 1 : 0;
+ if(tc <= 1)
+ {
+ dep = 0;
+ deq = 0;
+ }
+ }
+
+ if(de != 0)
+ {
+ for(row = 0; row < 4; row++)
+ {
+ tmp_p0 = pu1_src[-1];
+ tmp_p1 = pu1_src[-2];
+ tmp_p2 = pu1_src[-3];
+
+ tmp_q0 = pu1_src[0];
+ tmp_q1 = pu1_src[1];
+ tmp_q2 = pu1_src[2];
+
+ if(de == 2)
+ {
+ tmp_q0 = CLIP3((pu1_src[2] + 2 * pu1_src[1] +
+ 2 * pu1_src[0] + 2 * pu1_src[-1] +
+ pu1_src[-2] + 4) >> 3,
+ pu1_src[0] - 2 * tc,
+ pu1_src[0] + 2 * tc);
+
+ tmp_q1 = CLIP3((pu1_src[2] + pu1_src[1] + pu1_src[0] +
+ pu1_src[-1] + 2) >> 2,
+ pu1_src[1] - 2 * tc,
+ pu1_src[1] + 2 * tc);
+
+ tmp_q2 = CLIP3((2 * pu1_src[3] + 3 * pu1_src[2] +
+ pu1_src[1] + pu1_src[0] +
+ pu1_src[-1] + 4) >> 3,
+ pu1_src[2] - 2 * tc,
+ pu1_src[2] + 2 * tc);
+
+ tmp_p0 = CLIP3((pu1_src[1] + 2 * pu1_src[0] +
+ 2 * pu1_src[-1] + 2 * pu1_src[-2] +
+ pu1_src[-3] + 4) >> 3,
+ pu1_src[-1] - 2 * tc,
+ pu1_src[-1] + 2 * tc);
+
+ tmp_p1 = CLIP3((pu1_src[0] + pu1_src[-1] +
+ pu1_src[-2] + pu1_src[-3] + 2) >> 2,
+ pu1_src[-2] - 2 * tc,
+ pu1_src[-2] + 2 * tc);
+
+ tmp_p2 = CLIP3((pu1_src[0] + pu1_src[-1] +
+ pu1_src[-2] + 3 * pu1_src[-3] +
+ 2 * pu1_src[-4] + 4) >> 3,
+ pu1_src[-3] - 2 * tc,
+ pu1_src[-3] + 2 * tc);
+ }
+ else
+ {
+ delta = (9 * (pu1_src[0] - pu1_src[-1]) -
+ 3 * (pu1_src[1] - pu1_src[-2]) + 8) >> 4;
+ if(ABS(delta) < 10 * tc)
+ {
+ delta = CLIP3(delta, -tc, tc);
+
+ tmp_p0 = CLIP_U8(pu1_src[-1] + delta);
+ tmp_q0 = CLIP_U8(pu1_src[0] - delta);
+
+ if(dep == 1)
+ {
+ delta_p = CLIP3((((pu1_src[-3] + pu1_src[-1] + 1) >> 1)
+ - pu1_src[-2] + delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_p1 = CLIP_U8(pu1_src[-2] + delta_p);
+ }
+
+ if(deq == 1)
+ {
+ delta_q = CLIP3((((pu1_src[2] + pu1_src[0] + 1) >> 1)
+ - pu1_src[1] - delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_q1 = CLIP_U8(pu1_src[1] + delta_q);
+ }
+ }
+ }
+
+ if(filter_flag_p != 0)
+ {
+ pu1_src[-3] = tmp_p2;
+ pu1_src[-2] = tmp_p1;
+ pu1_src[-1] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu1_src[0] = tmp_q0;
+ pu1_src[1] = tmp_q1;
+ pu1_src[2] = tmp_q2;
+ }
+
+ pu1_src += src_strd;
+ }
+ }
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Decision process and filtering for the luma block vertical edge for high bit depth.
+*
+* @par Description:
+* The decision process for the luma block vertical edge is carried out and
+* an appropriate filter is applied. The boundary filter strength, bs should
+* be greater than 0. The pcm flags and the transquant bypass flags should
+* be taken care of by the calling function.
+*
+* @param[in] pu2_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_luma_vert(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 bs,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 beta_offset_div2,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_luma, beta_indx, tc_indx;
+ WORD32 beta, tc;
+ WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
+ WORD32 d_sam0, d_sam3;
+ WORD32 de, dep, deq;
+ WORD32 row;
+ WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
+ WORD32 delta, delta_p, delta_q;
+
+ ASSERT((bs > 0) && (bs <= 3));
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
+ beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
+
+ /* BS based on implementation can take value 3 if it is intra/inter egde */
+ /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
+ /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2) */
+ /* the above desired functionallity is achieved by doing (2*(bs>>1)) */
+
+ tc_indx = CLIP3(qp_luma + (2 * (bs >> 1)) + (tc_offset_div2 << 1), 0, 53);
+
+ beta = gai4_ihevc_beta_table[beta_indx] * (1 << (bit_depth - 8));
+ tc = gai4_ihevc_tc_table[tc_indx] * (1 << (bit_depth - 8));
+ if(0 == tc)
+ {
+ return;
+ }
+
+ dq0 = ABS(pu2_src[2] - 2 * pu2_src[1] + pu2_src[0]);
+ dq3 = ABS(pu2_src[3 * src_strd + 2] - 2 * pu2_src[3 * src_strd + 1]
+ + pu2_src[3 * src_strd + 0]);
+ dp0 = ABS(pu2_src[-3] - 2 * pu2_src[-2] + pu2_src[-1]);
+ dp3 = ABS(pu2_src[3 * src_strd - 3] - 2 * pu2_src[3 * src_strd - 2]
+ + pu2_src[3 * src_strd - 1]);
+
+ d0 = dp0 + dq0;
+ d3 = dp3 + dq3;
+
+ dp = dp0 + dp3;
+ dq = dq0 + dq3;
+
+ d = d0 + d3;
+
+ de = 0;
+ dep = 0;
+ deq = 0;
+
+ if(d < beta)
+ {
+ d_sam0 = 0;
+ if((2 * d0 < (beta >> 2))
+ && (ABS(pu2_src[3] - pu2_src[0]) + ABS(pu2_src[-1] - pu2_src[-4])
+ < (beta >> 3))
+ && ABS(pu2_src[0] - pu2_src[-1]) < ((5 * tc + 1) >> 1))
+ {
+ d_sam0 = 1;
+ }
+
+ pu2_src += 3 * src_strd;
+ d_sam3 = 0;
+ if((2 * d3 < (beta >> 2))
+ && (ABS(pu2_src[3] - pu2_src[0]) + ABS(pu2_src[-1] - pu2_src[-4])
+ < (beta >> 3))
+ && ABS(pu2_src[0] - pu2_src[-1]) < ((5 * tc + 1) >> 1))
+ {
+ d_sam3 = 1;
+ }
+ pu2_src -= 3 * src_strd;
+
+ de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
+ dep = (dp < (beta + (beta >> 1)) >> 3) ? 1 : 0;
+ deq = (dq < (beta + (beta >> 1)) >> 3) ? 1 : 0;
+ if(tc <= 1)
+ {
+ dep = 0;
+ deq = 0;
+ }
+ }
+
+ if(de != 0)
+ {
+ for(row = 0; row < 4; row++)
+ {
+ tmp_p0 = pu2_src[-1];
+ tmp_p1 = pu2_src[-2];
+ tmp_p2 = pu2_src[-3];
+
+ tmp_q0 = pu2_src[0];
+ tmp_q1 = pu2_src[1];
+ tmp_q2 = pu2_src[2];
+
+ if(de == 2)
+ {
+ tmp_q0 = CLIP3((pu2_src[2] + 2 * pu2_src[1] +
+ 2 * pu2_src[0] + 2 * pu2_src[-1] +
+ pu2_src[-2] + 4) >> 3,
+ pu2_src[0] - 2 * tc,
+ pu2_src[0] + 2 * tc);
+
+ tmp_q1 = CLIP3((pu2_src[2] + pu2_src[1] + pu2_src[0] +
+ pu2_src[-1] + 2) >> 2,
+ pu2_src[1] - 2 * tc,
+ pu2_src[1] + 2 * tc);
+
+ tmp_q2 = CLIP3((2 * pu2_src[3] + 3 * pu2_src[2] +
+ pu2_src[1] + pu2_src[0] +
+ pu2_src[-1] + 4) >> 3,
+ pu2_src[2] - 2 * tc,
+ pu2_src[2] + 2 * tc);
+
+ tmp_p0 = CLIP3((pu2_src[1] + 2 * pu2_src[0] +
+ 2 * pu2_src[-1] + 2 * pu2_src[-2] +
+ pu2_src[-3] + 4) >> 3,
+ pu2_src[-1] - 2 * tc,
+ pu2_src[-1] + 2 * tc);
+
+ tmp_p1 = CLIP3((pu2_src[0] + pu2_src[-1] +
+ pu2_src[-2] + pu2_src[-3] + 2) >> 2,
+ pu2_src[-2] - 2 * tc,
+ pu2_src[-2] + 2 * tc);
+
+ tmp_p2 = CLIP3((pu2_src[0] + pu2_src[-1] +
+ pu2_src[-2] + 3 * pu2_src[-3] +
+ 2 * pu2_src[-4] + 4) >> 3,
+ pu2_src[-3] - 2 * tc,
+ pu2_src[-3] + 2 * tc);
+ }
+ else
+ {
+ delta = (9 * (pu2_src[0] - pu2_src[-1]) -
+ 3 * (pu2_src[1] - pu2_src[-2]) + 8) >> 4;
+ if(ABS(delta) < 10 * tc)
+ {
+ delta = CLIP3(delta, -tc, tc);
+
+ tmp_p0 = CLIP3(pu2_src[-1] + delta, 0, ((1 << bit_depth) - 1));
+ tmp_q0 = CLIP3(pu2_src[0] - delta, 0, ((1 << bit_depth) - 1));
+ if(dep == 1)
+ {
+ delta_p = CLIP3((((pu2_src[-3] + pu2_src[-1] + 1) >> 1)
+ - pu2_src[-2] + delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_p1 = CLIP3(pu2_src[-2] + delta_p, 0, ((1 << bit_depth) - 1));
+ }
+
+ if(deq == 1)
+ {
+ delta_q = CLIP3((((pu2_src[2] + pu2_src[0] + 1) >> 1)
+ - pu2_src[1] - delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_q1 = CLIP3(pu2_src[1] + delta_q, 0, ((1 << bit_depth) - 1));
+ }
+ }
+ }
+
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-3] = tmp_p2;
+ pu2_src[-2] = tmp_p1;
+ pu2_src[-1] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0] = tmp_q0;
+ pu2_src[1] = tmp_q1;
+ pu2_src[2] = tmp_q2;
+ }
+
+ pu2_src += src_strd;
+ }
+ }
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*
+* Decision process and filtering for the luma block horizontal edge
+*
+* @par Description:
+* The decision process for the luma block horizontal edge is carried out
+* and an appropriate filter is applied. The boundary filter strength, bs
+* should be greater than 0. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu1_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_deblk_luma_horz(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 bs,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 beta_offset_div2,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q)
+{
+ WORD32 qp_luma, beta_indx, tc_indx;
+ WORD32 beta, tc;
+ WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
+ WORD32 d_sam0, d_sam3;
+ WORD32 de, dep, deq;
+ WORD32 col;
+ WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
+ WORD32 delta, delta_p, delta_q;
+
+ ASSERT((bs > 0));
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
+ beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
+
+ /* BS based on implementation can take value 3 if it is intra/inter egde */
+ /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
+ /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2) */
+ /* the above desired functionallity is achieved by doing (2*(bs>>1)) */
+
+ tc_indx = CLIP3(qp_luma + 2 * (bs >> 1) + (tc_offset_div2 << 1), 0, 53);
+
+ beta = gai4_ihevc_beta_table[beta_indx];
+ tc = gai4_ihevc_tc_table[tc_indx];
+ if(0 == tc)
+ {
+ return;
+ }
+
+ dq0 = ABS(pu1_src[2 * src_strd] - 2 * pu1_src[1 * src_strd] +
+ pu1_src[0 * src_strd]);
+
+ dq3 = ABS(pu1_src[3 + 2 * src_strd] - 2 * pu1_src[3 + 1 * src_strd] +
+ pu1_src[3 + 0 * src_strd]);
+
+ dp0 = ABS(pu1_src[-3 * src_strd] - 2 * pu1_src[-2 * src_strd] +
+ pu1_src[-1 * src_strd]);
+
+ dp3 = ABS(pu1_src[3 - 3 * src_strd] - 2 * pu1_src[3 - 2 * src_strd] +
+ pu1_src[3 - 1 * src_strd]);
+
+ d0 = dp0 + dq0;
+ d3 = dp3 + dq3;
+
+ dp = dp0 + dp3;
+ dq = dq0 + dq3;
+
+ d = d0 + d3;
+
+ de = 0;
+ dep = 0;
+ deq = 0;
+
+ if(d < beta)
+ {
+ d_sam0 = 0;
+ if((2 * d0 < (beta >> 2))
+ && (ABS(pu1_src[3 * src_strd] - pu1_src[0 * src_strd]) +
+ ABS(pu1_src[-1 * src_strd] - pu1_src[-4 * src_strd])
+ < (beta >> 3))
+ && ABS(pu1_src[0 * src_strd] - pu1_src[-1 * src_strd])
+ < ((5 * tc + 1) >> 1))
+ {
+ d_sam0 = 1;
+ }
+
+ pu1_src += 3;
+ d_sam3 = 0;
+ if((2 * d3 < (beta >> 2))
+ && (ABS(pu1_src[3 * src_strd] - pu1_src[0 * src_strd]) +
+ ABS(pu1_src[-1 * src_strd] - pu1_src[-4 * src_strd])
+ < (beta >> 3))
+ && ABS(pu1_src[0 * src_strd] - pu1_src[-1 * src_strd])
+ < ((5 * tc + 1) >> 1))
+ {
+ d_sam3 = 1;
+ }
+ pu1_src -= 3;
+
+ de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
+ dep = (dp < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
+ deq = (dq < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
+ if(tc <= 1)
+ {
+ dep = 0;
+ deq = 0;
+ }
+ }
+
+ if(de != 0)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ tmp_p0 = pu1_src[-1 * src_strd];
+ tmp_p1 = pu1_src[-2 * src_strd];
+ tmp_p2 = pu1_src[-3 * src_strd];
+
+ tmp_q0 = pu1_src[0 * src_strd];
+ tmp_q1 = pu1_src[1 * src_strd];
+ tmp_q2 = pu1_src[2 * src_strd];
+ if(de == 2)
+ {
+ tmp_q0 = CLIP3((pu1_src[2 * src_strd] +
+ 2 * pu1_src[1 * src_strd] +
+ 2 * pu1_src[0 * src_strd] +
+ 2 * pu1_src[-1 * src_strd] +
+ pu1_src[-2 * src_strd] + 4) >> 3,
+ pu1_src[0 * src_strd] - 2 * tc,
+ pu1_src[0 * src_strd] + 2 * tc);
+
+ tmp_q1 = CLIP3((pu1_src[2 * src_strd] +
+ pu1_src[1 * src_strd] +
+ pu1_src[0 * src_strd] +
+ pu1_src[-1 * src_strd] + 2) >> 2,
+ pu1_src[1 * src_strd] - 2 * tc,
+ pu1_src[1 * src_strd] + 2 * tc);
+
+ tmp_q2 = CLIP3((2 * pu1_src[3 * src_strd] +
+ 3 * pu1_src[2 * src_strd] +
+ pu1_src[1 * src_strd] +
+ pu1_src[0 * src_strd] +
+ pu1_src[-1 * src_strd] + 4) >> 3,
+ pu1_src[2 * src_strd] - 2 * tc,
+ pu1_src[2 * src_strd] + 2 * tc);
+
+ tmp_p0 = CLIP3((pu1_src[1 * src_strd] +
+ 2 * pu1_src[0 * src_strd] +
+ 2 * pu1_src[-1 * src_strd] +
+ 2 * pu1_src[-2 * src_strd] +
+ pu1_src[-3 * src_strd] + 4) >> 3,
+ pu1_src[-1 * src_strd] - 2 * tc,
+ pu1_src[-1 * src_strd] + 2 * tc);
+
+ tmp_p1 = CLIP3((pu1_src[0 * src_strd] +
+ pu1_src[-1 * src_strd] +
+ pu1_src[-2 * src_strd] +
+ pu1_src[-3 * src_strd] + 2) >> 2,
+ pu1_src[-2 * src_strd] - 2 * tc,
+ pu1_src[-2 * src_strd] + 2 * tc);
+
+ tmp_p2 = CLIP3((pu1_src[0 * src_strd] +
+ pu1_src[-1 * src_strd] +
+ pu1_src[-2 * src_strd] +
+ 3 * pu1_src[-3 * src_strd] +
+ 2 * pu1_src[-4 * src_strd] + 4) >> 3,
+ pu1_src[-3 * src_strd] - 2 * tc,
+ pu1_src[-3 * src_strd] + 2 * tc);
+ }
+ else
+ {
+ delta = (9 * (pu1_src[0 * src_strd] - pu1_src[-1 * src_strd]) -
+ 3 * (pu1_src[1 * src_strd] - pu1_src[-2 * src_strd]) +
+ 8) >> 4;
+ if(ABS(delta) < 10 * tc)
+ {
+ delta = CLIP3(delta, -tc, tc);
+
+ tmp_p0 = CLIP_U8(pu1_src[-1 * src_strd] + delta);
+ tmp_q0 = CLIP_U8(pu1_src[0 * src_strd] - delta);
+
+ if(dep == 1)
+ {
+ delta_p = CLIP3((((pu1_src[-3 * src_strd] +
+ pu1_src[-1 * src_strd] + 1) >> 1) -
+ pu1_src[-2 * src_strd] + delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_p1 = CLIP_U8(pu1_src[-2 * src_strd] + delta_p);
+ }
+
+ if(deq == 1)
+ {
+ delta_q = CLIP3((((pu1_src[2 * src_strd] +
+ pu1_src[0 * src_strd] + 1) >> 1) -
+ pu1_src[1 * src_strd] - delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_q1 = CLIP_U8(pu1_src[1 * src_strd] + delta_q);
+ }
+ }
+ }
+
+ if(filter_flag_p != 0)
+ {
+ pu1_src[-3 * src_strd] = tmp_p2;
+ pu1_src[-2 * src_strd] = tmp_p1;
+ pu1_src[-1 * src_strd] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu1_src[0 * src_strd] = tmp_q0;
+ pu1_src[1 * src_strd] = tmp_q1;
+ pu1_src[2 * src_strd] = tmp_q2;
+ }
+
+ pu1_src += 1;
+ }
+ }
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*
+* Decision process and filtering for the luma block horizontal edge for high bit depth
+*
+* @par Description:
+* The decision process for the luma block horizontal edge is carried out
+* and an appropriate filter is applied. The boundary filter strength, bs
+* should be greater than 0. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu1_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_luma_horz(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 bs,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 beta_offset_div2,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_luma, beta_indx, tc_indx;
+ WORD32 beta, tc;
+ WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
+ WORD32 d_sam0, d_sam3;
+ WORD32 de, dep, deq;
+ WORD32 col;
+ WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
+ WORD32 delta, delta_p, delta_q;
+
+ ASSERT((bs > 0));
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
+ beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
+
+ /* BS based on implementation can take value 3 if it is intra/inter egde */
+ /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
+ /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2) */
+ /* the above desired functionallity is achieved by doing (2*(bs>>1)) */
+
+ tc_indx = CLIP3(qp_luma + 2 * (bs >> 1) + (tc_offset_div2 << 1), 0, 53);
+
+ beta = gai4_ihevc_beta_table[beta_indx] * (1 << (bit_depth - 8));
+ tc = gai4_ihevc_tc_table[tc_indx] * (1 << (bit_depth - 8));
+ if(0 == tc)
+ {
+ return;
+ }
+
+ dq0 = ABS(pu2_src[2 * src_strd] - 2 * pu2_src[1 * src_strd] +
+ pu2_src[0 * src_strd]);
+
+ dq3 = ABS(pu2_src[3 + 2 * src_strd] - 2 * pu2_src[3 + 1 * src_strd] +
+ pu2_src[3 + 0 * src_strd]);
+
+ dp0 = ABS(pu2_src[-3 * src_strd] - 2 * pu2_src[-2 * src_strd] +
+ pu2_src[-1 * src_strd]);
+
+ dp3 = ABS(pu2_src[3 - 3 * src_strd] - 2 * pu2_src[3 - 2 * src_strd] +
+ pu2_src[3 - 1 * src_strd]);
+
+ d0 = dp0 + dq0;
+ d3 = dp3 + dq3;
+
+ dp = dp0 + dp3;
+ dq = dq0 + dq3;
+
+ d = d0 + d3;
+
+ de = 0;
+ dep = 0;
+ deq = 0;
+
+ if(d < beta)
+ {
+ d_sam0 = 0;
+ if((2 * d0 < (beta >> 2))
+ && (ABS(pu2_src[3 * src_strd] - pu2_src[0 * src_strd]) +
+ ABS(pu2_src[-1 * src_strd] - pu2_src[-4 * src_strd])
+ < (beta >> 3))
+ && ABS(pu2_src[0 * src_strd] - pu2_src[-1 * src_strd])
+ < ((5 * tc + 1) >> 1))
+ {
+ d_sam0 = 1;
+ }
+
+ pu2_src += 3;
+ d_sam3 = 0;
+ if((2 * d3 < (beta >> 2))
+ && (ABS(pu2_src[3 * src_strd] - pu2_src[0 * src_strd]) +
+ ABS(pu2_src[-1 * src_strd] - pu2_src[-4 * src_strd])
+ < (beta >> 3))
+ && ABS(pu2_src[0 * src_strd] - pu2_src[-1 * src_strd])
+ < ((5 * tc + 1) >> 1))
+ {
+ d_sam3 = 1;
+ }
+ pu2_src -= 3;
+
+ de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
+ dep = (dp < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
+ deq = (dq < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
+ if(tc <= 1)
+ {
+ dep = 0;
+ deq = 0;
+ }
+ }
+
+ if(de != 0)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ tmp_p0 = pu2_src[-1 * src_strd];
+ tmp_p1 = pu2_src[-2 * src_strd];
+ tmp_p2 = pu2_src[-3 * src_strd];
+
+ tmp_q0 = pu2_src[0 * src_strd];
+ tmp_q1 = pu2_src[1 * src_strd];
+ tmp_q2 = pu2_src[2 * src_strd];
+ if(de == 2)
+ {
+ tmp_q0 = CLIP3((pu2_src[2 * src_strd] +
+ 2 * pu2_src[1 * src_strd] +
+ 2 * pu2_src[0 * src_strd] +
+ 2 * pu2_src[-1 * src_strd] +
+ pu2_src[-2 * src_strd] + 4) >> 3,
+ pu2_src[0 * src_strd] - 2 * tc,
+ pu2_src[0 * src_strd] + 2 * tc);
+
+ tmp_q1 = CLIP3((pu2_src[2 * src_strd] +
+ pu2_src[1 * src_strd] +
+ pu2_src[0 * src_strd] +
+ pu2_src[-1 * src_strd] + 2) >> 2,
+ pu2_src[1 * src_strd] - 2 * tc,
+ pu2_src[1 * src_strd] + 2 * tc);
+
+ tmp_q2 = CLIP3((2 * pu2_src[3 * src_strd] +
+ 3 * pu2_src[2 * src_strd] +
+ pu2_src[1 * src_strd] +
+ pu2_src[0 * src_strd] +
+ pu2_src[-1 * src_strd] + 4) >> 3,
+ pu2_src[2 * src_strd] - 2 * tc,
+ pu2_src[2 * src_strd] + 2 * tc);
+
+ tmp_p0 = CLIP3((pu2_src[1 * src_strd] +
+ 2 * pu2_src[0 * src_strd] +
+ 2 * pu2_src[-1 * src_strd] +
+ 2 * pu2_src[-2 * src_strd] +
+ pu2_src[-3 * src_strd] + 4) >> 3,
+ pu2_src[-1 * src_strd] - 2 * tc,
+ pu2_src[-1 * src_strd] + 2 * tc);
+
+ tmp_p1 = CLIP3((pu2_src[0 * src_strd] +
+ pu2_src[-1 * src_strd] +
+ pu2_src[-2 * src_strd] +
+ pu2_src[-3 * src_strd] + 2) >> 2,
+ pu2_src[-2 * src_strd] - 2 * tc,
+ pu2_src[-2 * src_strd] + 2 * tc);
+
+ tmp_p2 = CLIP3((pu2_src[0 * src_strd] +
+ pu2_src[-1 * src_strd] +
+ pu2_src[-2 * src_strd] +
+ 3 * pu2_src[-3 * src_strd] +
+ 2 * pu2_src[-4 * src_strd] + 4) >> 3,
+ pu2_src[-3 * src_strd] - 2 * tc,
+ pu2_src[-3 * src_strd] + 2 * tc);
+ }
+ else
+ {
+ delta = (9 * (pu2_src[0 * src_strd] - pu2_src[-1 * src_strd]) -
+ 3 * (pu2_src[1 * src_strd] - pu2_src[-2 * src_strd]) +
+ 8) >> 4;
+ if(ABS(delta) < 10 * tc)
+ {
+ delta = CLIP3(delta, -tc, tc);
+ tmp_p0 = CLIP3(pu2_src[-1 * src_strd] + delta, 0, ((1 << bit_depth) - 1));
+ tmp_q0 = CLIP3(pu2_src[0 * src_strd] - delta, 0, ((1 << bit_depth) - 1));
+ if(dep == 1)
+ {
+ delta_p = CLIP3((((pu2_src[-3 * src_strd] +
+ pu2_src[-1 * src_strd] + 1) >> 1) -
+ pu2_src[-2 * src_strd] + delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_p1 = CLIP3(pu2_src[-2 * src_strd] + delta_p, 0, ((1 << bit_depth) - 1));
+ }
+
+ if(deq == 1)
+ {
+ delta_q = CLIP3((((pu2_src[2 * src_strd] +
+ pu2_src[0 * src_strd] + 1) >> 1) -
+ pu2_src[1 * src_strd] - delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_q1 = CLIP3(pu2_src[1 * src_strd] + delta_q, 0, ((1 << bit_depth) - 1));
+ }
+ }
+ }
+
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-3 * src_strd] = tmp_p2;
+ pu2_src[-2 * src_strd] = tmp_p1;
+ pu2_src[-1 * src_strd] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0 * src_strd] = tmp_q0;
+ pu2_src[1 * src_strd] = tmp_q1;
+ pu2_src[2 * src_strd] = tmp_q2;
+ }
+
+ pu2_src += 1;
+ }
+ }
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block vertical edge.
+*
+* @par Description:
+* Filter for chroma vertical edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu1_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_deblk_chroma_vert(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q)
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 delta_u, tmp_p0_u, tmp_q0_u;
+ WORD32 delta_v, tmp_p0_v, tmp_q0_v;
+ WORD32 row;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u];
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v];
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(row = 0; row < 4; row++)
+ {
+ delta_u = CLIP3((((pu1_src[0] - pu1_src[-2]) << 2) +
+ pu1_src[-4] - pu1_src[2] + 4) >> 3,
+ -tc_u, tc_u);
+
+ tmp_p0_u = CLIP_U8(pu1_src[-2] + delta_u);
+ tmp_q0_u = CLIP_U8(pu1_src[0] - delta_u);
+
+ delta_v = CLIP3((((pu1_src[1] - pu1_src[-1]) << 2) +
+ pu1_src[-3] - pu1_src[3] + 4) >> 3,
+ -tc_v, tc_v);
+
+ tmp_p0_v = CLIP_U8(pu1_src[-1] + delta_v);
+ tmp_q0_v = CLIP_U8(pu1_src[1] - delta_v);
+
+ if(filter_flag_p != 0)
+ {
+ pu1_src[-2] = tmp_p0_u;
+ pu1_src[-1] = tmp_p0_v;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu1_src[0] = tmp_q0_u;
+ pu1_src[1] = tmp_q0_v;
+ }
+
+ pu1_src += src_strd;
+ }
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block vertical edge.
+*
+* @par Description:
+* Filter for chroma vertical edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu2_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_chroma_vert(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 delta_u, tmp_p0_u, tmp_q0_u;
+ WORD32 delta_v, tmp_p0_v, tmp_q0_v;
+ WORD32 row;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u] * (1 << (bit_depth - 8));
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v] * (1 << (bit_depth - 8));
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(row = 0; row < 4; row++)
+ {
+ delta_u = CLIP3((((pu2_src[0] - pu2_src[-2]) << 2) +
+ pu2_src[-4] - pu2_src[2] + 4) >> 3,
+ -tc_u, tc_u);
+ tmp_p0_u = CLIP3(pu2_src[-2] + delta_u, 0, ((1 << bit_depth) - 1));
+ tmp_q0_u = CLIP3(pu2_src[0] - delta_u, 0, ((1 << bit_depth) - 1));
+
+ delta_v = CLIP3((((pu2_src[1] - pu2_src[-1]) << 2) +
+ pu2_src[-3] - pu2_src[3] + 4) >> 3,
+ -tc_v, tc_v);
+ tmp_p0_v = CLIP3(pu2_src[-1] + delta_v, 0, ((1 << bit_depth) - 1));
+ tmp_q0_v = CLIP3(pu2_src[1] - delta_v, 0, ((1 << bit_depth) - 1));
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-2] = tmp_p0_u;
+ pu2_src[-1] = tmp_p0_v;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0] = tmp_q0_u;
+ pu2_src[1] = tmp_q0_v;
+ }
+
+ pu2_src += src_strd;
+ }
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block horizontal edge.
+*
+* @par Description:
+* Filter for chroma horizontal edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu1_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_deblk_chroma_horz(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q)
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 tc;
+
+ WORD32 delta, tmp_p0, tmp_q0;
+ WORD32 col;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u];
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v];
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(col = 0; col < 8; col++)
+ {
+ tc = (col & 1) ? tc_v : tc_u;
+ delta = CLIP3((((pu1_src[0 * src_strd] -
+ pu1_src[-1 * src_strd]) << 2) +
+ pu1_src[-2 * src_strd] -
+ pu1_src[1 * src_strd] + 4) >> 3,
+ -tc, tc);
+
+ tmp_p0 = CLIP_U8(pu1_src[-1 * src_strd] + delta);
+ tmp_q0 = CLIP_U8(pu1_src[0 * src_strd] - delta);
+
+ if(filter_flag_p != 0)
+ {
+ pu1_src[-1 * src_strd] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu1_src[0 * src_strd] = tmp_q0;
+ }
+
+ pu1_src += 1;
+ }
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block horizontal edge.
+*
+* @par Description:
+* Filter for chroma horizontal edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu2_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_chroma_horz(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 tc;
+
+ WORD32 delta, tmp_p0, tmp_q0;
+ WORD32 col;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u] * (1 << (bit_depth - 8));
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v] * (1 << (bit_depth - 8));
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(col = 0; col < 8; col++)
+ {
+ tc = (col & 1) ? tc_v : tc_u;
+ delta = CLIP3((((pu2_src[0 * src_strd] -
+ pu2_src[-1 * src_strd]) << 2) +
+ pu2_src[-2 * src_strd] -
+ pu2_src[1 * src_strd] + 4) >> 3,
+ -tc, tc);
+ tmp_p0 = CLIP3(pu2_src[-1 * src_strd] + delta, 0, ((1 << bit_depth) - 1));
+ tmp_q0 = CLIP3(pu2_src[0 * src_strd] - delta, 0, ((1 << bit_depth) - 1));
+
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-1 * src_strd] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0 * src_strd] = tmp_q0;
+ }
+
+ pu2_src += 1;
+ }
+
+}