diff options
author | Hamsalekha S <hamsalekha.s@ittiam.com> | 2015-03-13 21:24:58 +0530 |
---|---|---|
committer | Hamsalekha S <hamsalekha.s@ittiam.com> | 2015-04-02 15:59:02 +0530 |
commit | 8d3d303c7942ced6a987a52db8977d768dc3605f (patch) | |
tree | cc806c96794356996b13ba9970941d0aed74a97e /encoder/ih264e_core_coding.c | |
parent | 3956d913d37327dcb340f836e604b04bd478b158 (diff) | |
download | android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.gz android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.bz2 android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.zip |
Initial version
Change-Id: I7efe9a589cd24edf86e8d086b40c27cbbf8b4017
Diffstat (limited to 'encoder/ih264e_core_coding.c')
-rwxr-xr-x | encoder/ih264e_core_coding.c | 2365 |
1 files changed, 2365 insertions, 0 deletions
diff --git a/encoder/ih264e_core_coding.c b/encoder/ih264e_core_coding.c new file mode 100755 index 0000000..5ba18de --- /dev/null +++ b/encoder/ih264e_core_coding.c @@ -0,0 +1,2365 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264e_core_coding.c + * + * @brief + * This file contains routines that perform luma and chroma core coding for + * intra macroblocks + * + * @author + * ittiam + * + * @par List of Functions: + * - ih264e_pack_l_mb_i16() + * - ih264e_pack_c_mb_i8() + * - ih264e_code_luma_intra_macroblock_16x16() + * - ih264e_code_luma_intra_macroblock_4x4() + * - ih264e_code_chroma_intra_macroblock_8x8() + * + * @remarks + * None + * + ******************************************************************************* + */ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include <stdio.h> +#include <string.h> +#include <assert.h> + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "ih264_platform_macros.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264e_defs.h" +#include "ih264_trans_data.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_structs.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_structs.h" +#include "ih264e_globals.h" +#include "ih264e_core_coding.h" +#include "ih264e_mc.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + +/** +******************************************************************************* +* +* @brief +* This function performs does the DCT transform then Hadamard transform +* and quantization for a macroblock when the mb mode is intra 16x16 mode +* +* @par Description: +* First cf4 is done on all 16 4x4 blocks of the 16x16 input block. +* Then hadamard transform is done on the DC coefficients +* Quantization is then performed on the 16x16 block, 4x4 wise +* +* @param[in] pu1_src +* Pointer to source sub-block +* +* @param[in] pu1_pred +* Pointer to prediction sub-block +* +* @param[in] pi2_out +* Pointer to residual sub-block +* The output will be in linear format +* The first 16 continuous locations will contain the values of Dc block +* After DC block and a stride 1st AC block will follow +* After one more stride next AC block will follow +* The blocks will be in raster scan order +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* Prediction stride +* +* @param[in] dst_strd +* Destination stride +* +* @param[in] pu2_scale_matrix +* The quantization matrix for 4x4 transform +* +* @param[in] pu2_threshold_matrix +* Threshold matrix +* +* @param[in] u4_qbits +* 15+QP/6 +* +* @param[in] u4_round_factor +* Round factor for quant +* +* @param[out] pu1_nnz +* Memory to store the non-zeros after transform +* The first byte will be the nnz of DC block +* From the next byte the AC nnzs will be stored in raster scan order +* +* @param u4_dc_flag +* Signals if Dc transform is to be done or not +* 1 -> Dc transform will be done +* 0 -> Dc transform will not be done +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec, + UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 dst_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz, + UWORD32 u4_dc_flag) + +{ + WORD32 blk_cntr; + WORD32 i4_offsetx, i4_offsety; + UWORD8 *pu1_curr_src, *pu1_curr_pred; + + WORD16 *pi2_dc_str = pi2_out; + + /* Move to the ac addresses */ + pu1_nnz++; + pi2_out += dst_strd; + + for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++) + { + IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety); + + pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd; + pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd; + + ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred, + pi2_out + blk_cntr * dst_strd, + src_strd, pred_strd, pu2_scale_matrix, + pu2_threshold_matrix, u4_qbits, + u4_round_factor, &pu1_nnz[blk_cntr], + &pi2_dc_str[blk_cntr]); + + } + + if (!u4_dc_flag) + return; + + /* + * In case of i16x16, we need to remove the contribution of dc coeffs into + * nnz of each block. We are doing that in the packing function + */ + + /* Adjust pointers to point to dc values */ + pi2_out -= dst_strd; + pu1_nnz--; + + u4_qbits++; + u4_round_factor <<= 1; + + ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix, + pu2_threshold_matrix, u4_qbits, + u4_round_factor, &pu1_nnz[0]); +} + +/** +******************************************************************************* +* +* @brief +* This function performs the intra 16x16 inverse transform process for H264 +* it includes inverse Dc transform, inverse quant and then inverse transform +* +* @par Description: +* +* @param[in] pi2_src +* Input data, 16x16 size +* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion +* after a stride 1st AC clock will be present again in raster can order +* Then each AC block of the 16x16 block will follow in raster scan order +* +* @param[in] pu1_pred +* The predicted data, 16x16 size +* Block by block form +* +* @param[in] pu1_out +* Output 16x16 +* In block by block form +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* input stride for prediction buffer +* +* @param[in] out_strd +* input stride for output buffer +* +* @param[in] pu2_iscale_mat +* Inverse quantization matrix for 4x4 transform +* +* @param[in] pu2_weigh_mat +* weight matrix of 4x4 transform +* +* @param[in] qp_div +* QP/6 +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least 20 in size +* +* @param[in] pu4_cntrl +* Controls the transform path +* total Last 17 bits are used +* the 16th th bit will correspond to DC block +* and 32-17 will correspond to the ac blocks in raster scan order +* bit equaling zero indicates that the entire 4x4 block is zero for DC +* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size +* +* @returns +* none +* +* @remarks +* The all zero case must be taken care outside +* +******************************************************************************* +*/ +void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec, + WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + UWORD32 u4_cntrl, + UWORD32 u4_dc_trans_flag, + WORD32 *pi4_tmp) +{ + /* Start index for inverse quant in a 4x4 block */ + WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1; + + /* Cntrl bits for 4x4 transforms + * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path + * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path + * : dc block must contain only single dc coefficient + * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac + * : ie not (ac or dc) + */ + UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; + + /* tmp registers for block ids */ + UWORD32 u4_blk_id; + + /* Subscrripts */ + WORD32 i4_offset_x, i4_offset_y; + + UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk; + + /* Src and stride for dc coeffs */ + UWORD32 u4_dc_inc; + WORD16 *pi2_dc_src; + + /* + * For intra blocks we need to do inverse dc transform + * In case if intra blocks, its here that we populate the dc bits in cntrl + * as they cannot be populated any earlier + */ + if (u4_dc_trans_flag) + { + UWORD32 cntr, u4_dc_cntrl; + /* Do inv hadamard and place the results at the start of each AC block */ + ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat, + pu2_weigh_mat, qp_div, pi4_tmp); + + /* Update the cntrl flag */ + u4_dc_cntrl = 0; + for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++) + { + u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr)); + } + /* Mark dc bits as 1 if corresponding ac bit is 0 */ + u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); + /* Combine both ac and dc bits */ + u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA) + | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA); + } + + /* Source for dc coeffs + * If the block is intra, we have to read dc values from first row of src + * then stride for each block is 1, other wise its src stride + */ + pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src; + u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1; + + /* The AC blocks starts from 2nd row */ + pi2_src += src_strd; + + /* Get the block bits */ + u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA); + u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16; + u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000; + + /* Get first block to process */ + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) + { + /* Compute address of src blocks */ + WORD32 i4_src_offset = u4_dc_inc * u4_blk_id; + + IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + /* Compute address of out and pred blocks */ + pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + /* Do inv dc transform */ + ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset, + pu1_cur_prd_blk, + pu1_cur_out_blk, pred_strd, + out_strd, pu2_iscale_mat, + pu2_weigh_mat, qp_div, NULL, + iq_start_idx, + pi2_dc_src + i4_src_offset); + /* Get next DC block to process */ + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + } + + /* now process ac/mixed blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) + { + + WORD32 i4_src_offset = src_strd * u4_blk_id; + + IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset, + pu1_cur_prd_blk, pu1_cur_out_blk, + pred_strd, out_strd, + pu2_iscale_mat, pu2_weigh_mat, + qp_div, (WORD16*) pi4_tmp, + iq_start_idx, + pi2_dc_src + u4_blk_id); + + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + } + + /* Now process empty blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) + { + IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk, + pred_strd, out_strd, SIZE_4X4_BLK_HRZ, + SIZE_4X4_BLK_VERT, 0, 0); + + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + } +} + +/** +******************************************************************************* +* +* @brief +* This function performs does the DCT transform then Hadamard transform +* and quantization for a chroma macroblock +* +* @par Description: +* First cf4 is done on all 16 4x4 blocks of the 8x8input block +* Then hadamard transform is done on the DC coefficients +* Quantization is then performed on the 8x8 block, 4x4 wise +* +* @param[in] pu1_src +* Pointer to source sub-block +* The input is in interleaved format for two chroma planes +* +* @param[in] pu1_pred +* Pointer to prediction sub-block +* Prediction is in inter leaved format +* +* @param[in] pi2_out +* Pointer to residual sub-block +* The output will be in linear format +* The first 4 continuous locations will contain the values of DC block for U +* and then next 4 will contain for V. +* After DC block and a stride 1st AC block of U plane will follow +* After one more stride next AC block of V plane will follow +* The blocks will be in raster scan order +* +* After all the AC blocks of U plane AC blocks of V plane will follow in exact +* same way +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* Prediction stride +* +* @param[in] dst_strd +* Destination stride +* +* @param[in] pu2_scale_matrix +* The quantization matrix for 4x4 transform +* +* @param[in] pu2_threshold_matrix +* Threshold matrix +* +* @param[in] u4_qbits +* 15+QP/6 +* +* @param[in] u4_round_factor +* Round factor for quant +* +* @param[out] pu1_nnz +* Memory to store the non-zeros after transform +* The first byte will be the nnz od DC block for U plane +* From the next byte the AC nnzs will be storerd in raster scan order +* The fifth byte will be nnz of Dc block of V plane +* Then Ac blocks will follow +* +* @param u4_dc_flag +* Signals if Dc transform is to be done or not +* 1 -> Dc transform will be done +* 0 -> Dc transform will not be done +* +* @remarks +* +******************************************************************************* +*/ +void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec, + UWORD8 *pu1_src, + UWORD8 *pu1_pred, + WORD16 *pi2_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_scale_matrix, + const UWORD16 *pu2_threshold_matrix, + UWORD32 u4_qbits, + UWORD32 u4_round_factor, + UWORD8 *pu1_nnz_c) +{ + WORD32 blk_cntr; + WORD32 i4_offsetx, i4_offsety; + UWORD8 *pu1_curr_src, *pu1_curr_pred; + + WORD16 pi2_dc_str[8]; + UWORD8 au1_dcnnz[2]; + + /* Move to the ac addresses */ + pu1_nnz_c++; + pi2_out += out_strd; + + for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++) + { + IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety); + + pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd; + pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd; + + /* For chroma, v plane nnz is populated from position 5 */ + ps_codec->pf_resi_trans_quant_chroma_4x4( + pu1_curr_src, pu1_curr_pred, + pi2_out + blk_cntr * out_strd, src_strd, pred_strd, + pu2_scale_matrix, pu2_threshold_matrix, u4_qbits, + u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)], + &pi2_dc_str[blk_cntr]); + } + + /* Adjust pointers to point to dc values */ + pi2_out -= out_strd; + pu1_nnz_c--; + + u4_qbits++; + u4_round_factor <<= 1; + + ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix, + pu2_threshold_matrix, u4_qbits, + u4_round_factor, au1_dcnnz); + + /* Copy the dc nnzs */ + pu1_nnz_c[0] = au1_dcnnz[0]; + pu1_nnz_c[5] = au1_dcnnz[1]; + +} + +/** +******************************************************************************* +* @brief +* This function performs the inverse transform with process for chroma MB of H264 +* +* @par Description: +* Does inverse DC transform ,inverse quantization inverse transform +* +* @param[in] pi2_src +* Input data, 16x16 size +* The input is in the form of, first 4 locations will contain DC coeffs of +* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane +* in raster scan order will follow, each block as linear array in raster scan order. +* After a stride next AC block will follow. After all AC blocks of U plane +* V plane AC blocks will follow in exact same order. +* +* @param[in] pu1_pred +* The predicted data, 8x16 size, U and V interleaved +* +* @param[in] pu1_out +* Output 8x16, U and V interleaved +* +* @param[in] src_strd +* Source stride +* +* @param[in] pred_strd +* input stride for prediction buffer +* +* @param[in] out_strd +* input stride for output buffer +* +* @param[in] pu2_iscale_mat +* Inverse quantization martix for 4x4 transform +* +* @param[in] pu2_weigh_mat +* weight matrix of 4x4 transform +* +* @param[in] qp_div +* QP/6 +* +* @param[in] pi4_tmp +* Input temporary buffer +* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes +* in size +* +* @param[in] pu4_cntrl +* Controls the transform path +* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block +* 32-28 bits will indicate AC blocks of U plane in raster scan order +* 27-23 bits will indicate AC blocks of V plane in rater scan order +* The bit 1 implies that there is at least one non zero coeff in a block +* +* @returns +* none +* +* @remarks +******************************************************************************* +*/ +void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec, + WORD16 *pi2_src, + UWORD8 *pu1_pred, + UWORD8 *pu1_out, + WORD32 src_strd, + WORD32 pred_strd, + WORD32 out_strd, + const UWORD16 *pu2_iscale_mat, + const UWORD16 *pu2_weigh_mat, + UWORD32 qp_div, + UWORD32 u4_cntrl, + WORD32 *pi4_tmp) +{ + /* Cntrl bits for 4x4 transforms + * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path + * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path + * : dc block must contain only single dc coefficient + * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac + * : ie not (ac or dc) + */ + + UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; + + /* tmp registers for block ids */ + WORD32 u4_blk_id; + + /* Offsets for pointers */ + WORD32 i4_offset_x, i4_offset_y; + + /* Pointer to 4x4 blocks */ + UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk; + + /* Tmp register for pointer to dc coffs */ + WORD16 *pi2_dc_src; + + WORD16 i2_zero = 0; + + /* Increment for dc block */ + WORD32 i4_dc_inc; + + /* + * Lets do the inverse transform for dc coeffs in chroma + */ + if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA) + { + UWORD32 cntr, u4_dc_cntrl; + /* Do inv hadamard for u an v block */ + + ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat, + pu2_weigh_mat, qp_div, NULL); + /* + * Update the cntrl flag + * Flag is updated as follows bits 15-11 -> u block dc bits + */ + u4_dc_cntrl = 0; + for (cntr = 0; cntr < 8; cntr++) + { + u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr)); + } + + /* Mark dc bits as 1 if corresponding ac bit is 0 */ + u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); + /* Combine both ac and dc bits */ + u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA) + | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA); + + /* Since we populated the dc coffs, we have to read them from there */ + pi2_dc_src = pi2_src; + i4_dc_inc = 1; + } + else + { + u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA; + pi2_dc_src = &i2_zero; + i4_dc_inc = 0; + } + + /* Get the block bits */ + u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA); + u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16; + u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000; + + /* The AC blocks starts from 2nd row */ + pi2_src += src_strd; + + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + while (u4_blk_id < 8) + { + WORD32 dc_src_offset = u4_blk_id * i4_dc_inc; + + IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc( + pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk, + pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0, + NULL, pi2_dc_src + dc_src_offset); + /* Get next DC block to process */ + DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); + } + + /* now process ac/mixed blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + while (u4_blk_id < 8) + { + WORD32 i4_src_offset = src_strd * u4_blk_id; + WORD32 dc_src_offset = i4_dc_inc * u4_blk_id; + + IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset, + pu1_cur_4x4_prd_blk, + pu1_cur_4x4_out_blk, + pred_strd, out_strd, + pu2_iscale_mat, + pu2_weigh_mat, qp_div, + (WORD16 *) pi4_tmp, + pi2_dc_src + dc_src_offset); + + DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); + } + + /* Now process empty blocks */ + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + while (u4_blk_id < 8) + { + IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); + + pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; + pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; + + ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk, + pred_strd, out_strd, SIZE_4X4_BLK_VERT, + SIZE_4X4_BLK_HRZ); + + DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); + } +} + +/** +****************************************************************************** +* +* @brief This function packs residue of an i16x16 luma mb for entropy coding +* +* @par Description +* An i16 macro block contains two classes of units, dc 4x4 block and +* 4x4 ac blocks. while packing the mb, the dc block is sent first, and +* the 16 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 16 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] u4_res_strd +* residual block stride +* +* @param[out] u1_cbp_l +* coded block pattern luma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] +* Control signal for inverse transform of 16x16 blocks +* +* @return none +* +* @ remarks +* +****************************************************************************** +*/ +void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, + void **pv_mb_coeff_data, + WORD32 i4_res_strd, + UWORD8 *u1_cbp_l, + UWORD8 *pu1_nnz, + UWORD32 *pu4_cntrl) +{ + /* pointer to packed sub block buffer space */ + tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order; + + /* number of non zeros in sub block */ + UWORD32 u4_nnz; + + /* coeff scan order */ + const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + + /* temp var */ + UWORD32 coeff_cnt, mask, b4,u4_cntrl=0; + + /*DC and AC coeff pointers*/ + WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc; + + /********************************************************/ + /* pack dc coeff data for entropy coding */ + /********************************************************/ + + pi2_res_mb_dc = pi2_res_mb; + pu1_scan_order = gu1_luma_scan_order_dc; + + u4_nnz = *pu1_nnz; + u4_cntrl = 0; + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + + u4_cntrl = 0x00008000;// Set DC bit in ctrl code + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + /********************************************************/ + /* pack ac coeff data for entropy coding */ + /********************************************************/ + + pu1_nnz ++; + pu1_scan_order = gu1_luma_scan_order; + pi2_res_mb += i4_res_strd; /*Move to AC block*/ + + ps_mb_coeff_data_ac = (*pv_mb_coeff_data); + + for (b4 = 0; b4 < 16; b4++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + u4_nnz = pu1_nnz[u1_scan_order[b4]]; + + /* Jump according to the scan order */ + pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); + + /* + * Since this is a i16x16 block, we should not count dc coeff on indi + * vidual 4x4 blocks to nnz. But due to the implementation of 16x16 + * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that + * here + */ + u4_nnz -= (pi2_res_mb_ac[0] != 0); + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + *u1_cbp_l = 15; + + u4_cntrl |= (1 << (31 - u1_scan_order[b4])); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + } + + if (!(*u1_cbp_l)) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; + } + + /* Store the cntrl signal */ + (*pu4_cntrl) = u4_cntrl; + return; +} + +/** +****************************************************************************** +* +* @brief This function packs residue of an p16x16 luma mb for entropy coding +* +* @par Description +* A p16x16 macro block contains two classes of units 16 4x4 ac blocks. +* while packing the mb, the dc block is sent first, and +* the 16 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 16 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] i4_res_strd +* residual block stride +* +* @param[out] u1_cbp_l +* coded block pattern luma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] pu4_cntrl +* Control signal for inverse transform +* +* @return none +* +* @remarks Killing coffs not yet coded +* +****************************************************************************** +*/ +void ih264e_pack_l_mb(WORD16 *pi2_res_mb, + void **pv_mb_coeff_data, + WORD32 i4_res_strd, + UWORD8 *u1_cbp_l, + UWORD8 *pu1_nnz, + UWORD32 u4_thres_resi, + UWORD32 *pu4_cntrl) +{ + /* pointer to packed sub block buffer space */ + tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order = gu1_luma_scan_order; + + /* number of non zeros in sub block */ + UWORD32 u4_nnz; + + /* pointer to residual sub block */ + WORD16 *pi2_res_sb; + + /* coeff scan order */ + const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; + + /* coeff cost */ + const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; + + /* temp var */ + UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8; + + /* temp var */ + WORD32 i4_res_val, i4_run = -1, dcac_block; + + /* When Hadamard transform is disabled, first row values are dont care, ignore them */ + pi2_res_mb += i4_res_strd; + + /* When Hadamard transform is disabled, first unit value is dont care, ignore this */ + pu1_nnz ++; + + ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + + for (b4 = 0; b4 < 16; b4++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + b8 = b4 >> 2; + + u4_nnz = pu1_nnz[u1_scan_order[b4]]; + + /* Jump according to the scan order */ + pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + /* number of runs of zero before, this is used to compute coeff cost */ + i4_run++; + + i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; + + if (i4_res_val) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val; + u4_s_map |= mask; + + if (u4_thres_resi) + { + /* compute coeff cost */ + if (i4_res_val == 1 || i4_res_val == -1) + { + if (i4_run < 6) + u4_b8_coeff_cost += pu1_coeff_cost[i4_run]; + } + else + u4_b8_coeff_cost += 9; + + i4_run = -1; + } + } + + mask <<= 1; + } + + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + + /* cbp */ + *u1_cbp_l |= (1 << b8); + + /* Cntrl map for inverse transform computation + * + * If coeff_cnt is zero, it means that only nonzero was a dc coeff + * Hence we have to set the 16 - u1_scan_order[b4]) position instead + * of 31 - u1_scan_order[b4] + */ + dcac_block = (coeff_cnt == 0)?16:31; + u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4])); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + /* Decide if the 8x8 unit has to be sent for entropy coding? */ + if ((b4+1) % 4 == 0) + { + if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) && + (*u1_cbp_l & (1 << b8)) ) + { + + + /* + * When we want to reset the full 8x8 block, we have to reset + * both the dc and ac coeff bits hence we have the symmetric + * arrangement of bits + */ + const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033}; + + /* restore cbp */ + *u1_cbp_l = (*u1_cbp_l & (~(1 << b8))); + + /* correct cntrl flag */ + u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]); + + /* correct nnz */ + pu1_nnz[u1_scan_order[b4 - 3]] = 0; + pu1_nnz[u1_scan_order[b4 - 2]] = 0; + pu1_nnz[u1_scan_order[b4 - 1]] = 0; + pu1_nnz[u1_scan_order[b4]] = 0; + + /* reset blk cost */ + u4_b8_coeff_cost = 0; + } + + if (!(*u1_cbp_l & (1 << b8))) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_b8; + } + + u4_mb_coeff_cost += u4_b8_coeff_cost; + + u4_b8_coeff_cost = 0; + i4_run = -1; + ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); + } + } + + if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD) + && (*u1_cbp_l)) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_mb; + *u1_cbp_l = 0; + u4_cntrl = 0; + memset(pu1_nnz, 0, 16); + } + + (*pu4_cntrl) = u4_cntrl; + + return; +} + +/** +****************************************************************************** +* +* @brief This function packs residue of an i8x8 chroma mb for entropy coding +* +* @par Description +* An i8 chroma macro block contains two classes of units, dc 2x2 block and +* 4x4 ac blocks. while packing the mb, the dc block is sent first, and +* the 4 ac blocks are sent next in scan order. Each and every block is +* represented by 3 parameters (nnz, significant coefficient map and the +* residue coefficients itself). If a 4x4 unit does not have any coefficients +* then only nnz is sent. Inside a 4x4 block the individual coefficients are +* sent in scan order. +* +* The first byte of each block will be nnz of the block, if it is non zero, +* a 2 byte significance map is sent. This is followed by nonzero coefficients. +* This is repeated for 1 dc + 4 ac blocks. +* +* @param[in] pi2_res_mb +* pointer to residue mb +* +* @param[in, out] pv_mb_coeff_data +* buffer pointing to packed residue coefficients +* +* @param[in] u4_res_strd +* residual block stride +* +* @param[out] u1_cbp_c +* coded block pattern chroma +* +* @param[in] pu1_nnz +* number of non zero coefficients in each 4x4 unit +* +* @param[out] pu1_nnz +* Control signal for inverse transform +* +* @param[in] u4_swap_uv +* Swaps the order of U and V planes in entropy bitstream +* +* @return none +* +* @ remarks +* +****************************************************************************** +*/ +void ih264e_pack_c_mb(WORD16 *pi2_res_mb, + void **pv_mb_coeff_data, + WORD32 i4_res_strd, + UWORD8 *u1_cbp_c, + UWORD8 *pu1_nnz, + UWORD32 u4_thres_resi, + UWORD32 *pu4_cntrl, + UWORD32 u4_swap_uv) +{ + /* pointer to packed sub block buffer space */ + tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data); + tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac; + + /* nnz pointer */ + UWORD8 *pu1_nnz_ac, *pu1_nnz_dc; + + /* nnz counter */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz; + + /* pointer to residual sub block, res val */ + WORD16 *pi2_res_sb, i2_res_val; + + /* temp var */ + UWORD32 coeff_cnt, mask, b4,plane; + + /* temp var */ + UWORD32 u4_coeff_cost; + WORD32 i4_run; + + /* coeff cost */ + const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; + + /* pointer to packed buffer space */ + UWORD32 *pu4_mb_coeff_data = NULL; + + /* ac coded block pattern */ + UWORD8 u1_cbp_ac; + + /* Variable to store the current bit pos in cntrl variable*/ + UWORD32 cntrl_pos = 0; + + /********************************************************/ + /* pack dc coeff data for entropy coding */ + /********************************************************/ + pu1_scan_order = gu1_chroma_scan_order_dc; + pi2_res_sb = pi2_res_mb; + pu1_nnz_dc = pu1_nnz; + (*pu4_cntrl) = 0; + cntrl_pos = 15; + ps_mb_coeff_data_dc = (*pv_mb_coeff_data); + + /* Color space conversion between SP_UV and SP_VU + * We always assume SP_UV for all the processing + * Hence to get proper stream output we need to swap U and V channels here + * + * For that there are two paths we need to look for + * One is the path to bitstream , these variables should have the proper input + * configured UV or VU + * For the other path the inverse transform variables should have ehat ever 0ordering the + * input had + */ + + if (u4_swap_uv) + { + pu1_nnz_dc += 5;/* Move to NNZ of V planve */ + pi2_res_sb += 4;/* Move to DC coff of V plane */ + + cntrl_pos = 14; /* Control bit for V plane */ + } + + for (plane = 0; plane < 2; plane++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + u4_nnz = *pu1_nnz_dc; + /* write number of non zero coefficients U/V */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; + if (i2_res_val) + { + /* write residue U/V */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map U/V */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + *u1_cbp_c = 1; + + (*pu4_cntrl) |= (1 << cntrl_pos); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + if (u4_swap_uv) + { + cntrl_pos++; /* Control bit for U plane */ + pu1_nnz_dc -= 5; /* Move to NNZ of U plane */ + pi2_res_sb -= 4; /* Move to DC coff of U plane */ + + } + else + { + cntrl_pos--; /* Control bit for U plane */ + pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */ + pi2_res_sb += 4; /* Move to DC coff of V plane */ + } + } + + /********************************************************/ + /* pack ac coeff data for entropy coding */ + /********************************************************/ + + pu1_scan_order = gu1_chroma_scan_order; + ps_mb_coeff_data_ac = (*pv_mb_coeff_data); + + if (u4_swap_uv) + { + pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */ + cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */ + pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ + } + else + { + pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */ + cntrl_pos = 31; + pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */ + } + + for (plane = 0; plane < 2; plane++) + { + pu4_mb_coeff_data = (*pv_mb_coeff_data); + + u4_coeff_cost = 0; + i4_run = -1; + + /* get the current cbp, so that it automatically + * gets reverted in case of zero ac values */ + u1_cbp_ac = *u1_cbp_c; + + for (b4 = 0; b4 < 4; b4++) + { + ps_mb_coeff_data = (*pv_mb_coeff_data); + + u4_nnz = *pu1_nnz_ac; + + /* + * We are scanning only ac coeffs, but the nnz is for the + * complete 4x4 block. Hence we have to discount the nnz contributed + * by the dc coefficient + */ + u4_nnz -= (pi2_res_sb[0]!=0); + + /* write number of non zero coefficients U/V */ + ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; + + if (u4_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) + { + i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; + + i4_run++; + + if (i2_res_val) + { + /* write residue U/V */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; + u4_s_map |= mask; + + if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) ) + { + /* compute coeff cost */ + if (i2_res_val == 1 || i2_res_val == -1) + { + if (i4_run < 6) + u4_coeff_cost += pu1_coeff_cost[i4_run]; + } + else + u4_coeff_cost += 9; + + i4_run = -1; + } + } + mask <<= 1; + } + + /* write significant coeff map U/V */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + u1_cbp_ac = 2; + + (*pu4_cntrl) |= 1 << cntrl_pos; + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + pu1_nnz_ac++; + pi2_res_sb += i4_res_strd; + cntrl_pos--; + } + + /* reset block */ + if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD)) + { + pu4_mb_coeff_data[0] = 0; + pu4_mb_coeff_data[1] = 0; + pu4_mb_coeff_data[2] = 0; + pu4_mb_coeff_data[3] = 0; + (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4; + + /* Generate the control signal */ + /* Zero out the current plane's AC coefficients */ + (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF); + + /* Similarly do for the NNZ also */ + *(pu1_nnz_ac - 4) = 0; + *(pu1_nnz_ac - 3) = 0; + *(pu1_nnz_ac - 2) = 0; + *(pu1_nnz_ac - 1) = 0; + } + else + { + *u1_cbp_c = u1_cbp_ac; + } + + if (u4_swap_uv) + { + pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */ + cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */ + pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ + + pu1_nnz_ac = pu1_nnz + 1; + } + else + pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */ + } + + /* restore the ptr basing on cbp */ + if (*u1_cbp_c == 0) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_dc; + } + else if (*u1_cbp_c == 1) + { + (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; + } + + return ; +} + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i16x16 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i16x16, the mb is first +* predicted using one of i16x16 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is transformed (hierarchical transform i.e., dct followed by hada- +* -mard), quantized. The quantized coefficients are packed in scan order for +* entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks none +* +******************************************************************************* +*/ + +UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = NULL; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + + /* intra mode */ + UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; + + /* coded block pattern */ + UWORD8 u1_cbp_l = 0; + + /* number of non zero coeffs*/ + UWORD32 au4_nnz[5]; + UWORD8 *pu1_nnz = (UWORD8 *)au4_nnz; + + /*Cntrol signal for itrans*/ + UWORD32 u4_cntrl; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* init nnz */ + au4_nnz[0] = 0; + au4_nnz[1] = 0; + au4_nnz[2] = 0; + au4_nnz[3] = 0; + au4_nnz[4] = 0; + + if (u1_intra_mode == PLANE_I16x16) + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane; + } + else + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16; + } + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, + pu1_pred_mb, pi2_res_mb, + i4_src_strd, i4_pred_strd, + i4_res_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + pu1_nnz, ENABLE_DC_TRANSFORM); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, + pu1_nnz, &u4_cntrl); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + /* + *if refernce frame is not to be computed + *we only need the right and bottom border 4x4 blocks to predict next intra + *blocks, hence only compute them + */ + if (!ps_proc->u4_compute_recon) + { + u4_cntrl &= 0x111F8000; + } + + if (u4_cntrl) + { + ih264e_luma_16x16_idctrans_iquant_itrans_recon( + ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb, + i4_res_strd, i4_pred_strd, i4_rec_strd, + ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, + u4_cntrl, ENABLE_DC_TRANSFORM, + ps_proc->pv_scratch_buff); + } + else + { + ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd, + i4_rec_strd, MB_SIZE, MB_SIZE, NULL, + 0); + } + + return (u1_cbp_l); +} + + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i4x4 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i4x4, the mb is first +* predicted using one of i4x4 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is dct transformed and quantized. The quantized coefficients are +* packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks +* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + + /* pointer to neighbors: left, top, top-left */ + UWORD8 *pu1_mb_a; + UWORD8 *pu1_mb_b; + UWORD8 *pu1_mb_c; + UWORD8 *pu1_mb_d; + + /* intra mode */ + UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; + + /* neighbor availability */ + WORD32 i4_ngbr_avbl; + + /* neighbor pels for intra prediction */ + UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; + + /* coded block pattern */ + UWORD8 u1_cbp_l = 0; + + /* number of non zero coeffs*/ + UWORD8 u1_nnz; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* pointer to packed mb coeff data */ + tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order = gu1_luma_scan_order; + + /*Dummy variable for 4x4 trans fucntion*/ + WORD16 i2_dc_dummy; + + /* temp var */ + UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask; + + /* Process 16 4x4 lum sub-blocks of the MB in scan order */ + for (b8 = 0; b8 < 4; b8++) + { + u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3; + u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3; + + /* if in case cbp for the 8x8 block is zero, send no residue */ + ps_mb_coeff_data_b8 = *pv_mb_coeff_data; + + for (b4 = 0; b4 < 4; b4++) + { + /* index of pel in MB */ + u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2); + u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2); + + /* Initialize source and reference pointers */ + pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd); + pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd); + + /* pointer to left of ref macro block */ + pu1_mb_a = pu1_ref_mb - 1; + /* pointer to top of ref macro block */ + pu1_mb_b = pu1_ref_mb - i4_rec_strd; + /* pointer to topright of ref macro block */ + pu1_mb_c = pu1_mb_b + 4; + /* pointer to topleft macro block */ + pu1_mb_d = pu1_mb_b - 1; + + /* compute neighbor availability */ + i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; + + /* sub block intra mode */ + u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4]; + + /********************************************************/ + /* gather prediction pels from neighbors for prediction */ + /********************************************************/ + /* left pels */ + if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK) + { + for (i = 0; i < 4; i++) + pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd]; + } + else + { + memset(pu1_ngbr_pels_i4, 0, 4); + } + + /* top pels */ + if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) + { + memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); + } + else + { + memset(pu1_ngbr_pels_i4 + 5, 0, 4); + } + /* top left pels */ + if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK) + { + pu1_ngbr_pels_i4[4] = *pu1_mb_d; + } + else + { + pu1_ngbr_pels_i4[4] = 0; + } + /* top right pels */ + if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK) + { + memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4); + } + else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) + { + memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4); + } + + /********************************************************/ + /* prediction */ + /********************************************************/ + (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4, + pu1_pred_mb, 0, + i4_pred_strd, + i4_ngbr_avbl); + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb, + pi2_res_mb, i4_src_strd, + i4_pred_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + &u1_nnz, &i2_dc_dummy); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ps_mb_coeff_data = *pv_mb_coeff_data; + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz; + + if (u1_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++) + { + if (pi2_res_mb[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + + /* update ptr to coeff data */ + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + + /* cbp */ + u1_cbp_l |= (1 << b8); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + /* If the frame is not to be used for P frame reference or dumping recon + * we only will use the recon for only predicting intra Mbs + * This will need only right and bottom edge 4x4 blocks recon + * Hence we selectively enable them + */ + if (ps_proc->u4_compute_recon || (0xF888 & (1 << ((b8 << 2) + b4)))) + { + if (u1_nnz) + ps_codec->pf_iquant_itrans_recon_4x4( + pi2_res_mb, pu1_pred_mb, pu1_ref_mb, + /*No input stride,*/i4_pred_strd, + i4_rec_strd, ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, + ps_qp_params->u1_qp_div, + ps_proc->pv_scratch_buff, 0, 0); + else + ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, + i4_pred_strd, i4_rec_strd, + BLK_SIZE, BLK_SIZE, NULL, + 0); + } + + } + + /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ + if (!(u1_cbp_l & (1 << b8))) + { + *pv_mb_coeff_data = ps_mb_coeff_data_b8; + } + } + + return (u1_cbp_l); +} + +/** +******************************************************************************* +* +* @brief performs luma core coding when intra mode is i4x4 +* +* @par Description: +* If the current mb is to be coded as intra of mb type i4x4, the mb is first +* predicted using one of i4x4 prediction filters, basing on the intra mode +* chosen. Then, error is computed between the input blk and the estimated blk. +* This error is dct transformed and quantized. The quantized coefficients are +* packed in scan order for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks +* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4; + + /* pointer to recon buffer */ + UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; + + /* strides */ + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* number of non zero coeffs*/ + UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4; + + /* coded block pattern */ + UWORD8 u1_cbp_l = 0; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* pointer to packed mb coeff data */ + tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; + + /* no of non zero coefficients in the current sub block */ + UWORD32 u4_nnz_cnt; + + /* significant coefficient map */ + UWORD32 u4_s_map; + + /* pointer to scanning matrix */ + const UWORD8 *pu1_scan_order = gu1_luma_scan_order; + + /* temp var */ + UWORD32 b8, b4, coeff_cnt, mask; + + /* Process 16 4x4 lum sub-blocks of the MB in scan order */ + for (b8 = 0; b8 < 4; b8++) + { + /* if in case cbp for the 8x8 block is zero, send no residue */ + ps_mb_coeff_data_b8 = *pv_mb_coeff_data; + + for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE) + { + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ps_mb_coeff_data = *pv_mb_coeff_data; + + /* write number of non zero coefficients */ + ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz; + + if (*pu1_nnz) + { + for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++) + { + if (pi2_res_mb[pu1_scan_order[coeff_cnt]]) + { + /* write residue */ + ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]]; + u4_s_map |= mask; + } + mask <<= 1; + } + /* write significant coeff map */ + ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); + + /* update ptr to coeff data */ + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt; + + /* cbp */ + u1_cbp_l |= (1 << b8); + } + else + { + (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; + } + } + + /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ + if (!(u1_cbp_l & (1 << b8))) + { + *pv_mb_coeff_data = ps_mb_coeff_data_b8; + } + } + + /* memcpy recon */ + ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0); + + return (u1_cbp_l); +} + + +/** +******************************************************************************* +* +* @brief performs chroma core coding for intra macro blocks +* +* @par Description: +* If the current MB is to be intra coded with mb type chroma I8x8, the MB is +* first predicted using intra 8x8 prediction filters. The predicted data is +* compared with the input for error and the error is transformed. The DC +* coefficients of each transformed sub blocks are further transformed using +* Hadamard transform. The resulting coefficients are quantized, packed and sent +* for entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_c +* coded block pattern chroma +* +* @remarks +* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order +* mentioned in h.264 specification +* +******************************************************************************* +*/ +UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = NULL; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + + /* intra mode */ + UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode; + + /* coded block pattern */ + UWORD8 u1_cbp_c = 0; + + /* number of non zero coeffs*/ + UWORD8 au1_nnz[18] = {0}; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; + + /* Control signal for inverse transform */ + UWORD32 u4_cntrl; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* See if we need to swap U and V plances for entropy */ + UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU; + + if (PLANE_CH_I8x8 == u1_intra_mode) + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane; + } + else + { + pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; + } + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, + pu1_pred_mb, pi2_res_mb, + i4_src_strd, i4_pred_strd, + i4_res_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + au1_nnz); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, + au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb, + pu1_pred_mb, pu1_ref_mb, + i4_res_strd, i4_pred_strd, + i4_rec_strd, + ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, + ps_qp_params->u1_qp_div, + u4_cntrl, + ps_proc->pv_scratch_buff); + return (u1_cbp_c); +} + + +/** +******************************************************************************* +* +* @brief performs luma core coding when mode is inter +* +* @par Description: +* If the current mb is to be coded as inter the mb is predicted based on the +* sub mb partitions and corresponding motion vectors generated by ME. Then, +* error is computed between the input blk and the estimated blk. This error is +* transformed, quantized. The quantized coefficients are packed in scan order +* for entropy coding +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern luma +* +* @remarks none +* +******************************************************************************* +*/ + +UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + + /* coded block pattern */ + UWORD8 u1_cbp_l = 0; + + /*Control signal of itrans*/ + UWORD32 u4_cntrl; + + /* number of non zero coeffs*/ + UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /* pseudo pred buffer */ + UWORD8 *pu1_pseudo_pred = pu1_pred_mb; + + /* pseudo pred buffer stride */ + WORD32 i4_pseudo_pred_strd = i4_pred_strd; + + /* init nnz */ + ps_proc->au4_nnz[0] = 0; + ps_proc->au4_nnz[1] = 0; + ps_proc->au4_nnz[2] = 0; + ps_proc->au4_nnz[3] = 0; + ps_proc->au4_nnz[4] = 0; + + /********************************************************/ + /* prediction */ + /********************************************************/ + ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd); + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0) + { + ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, + pu1_pseudo_pred, pi2_res_mb, + i4_src_strd, + i4_pseudo_pred_strd, + i4_res_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + pu1_nnz, + DISABLE_DC_TRANSFORM); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, + pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl); + } + else + { + u1_cbp_l = 0; + u4_cntrl = 0; + } + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + + /*If the frame is not to be used for P frame reference or dumping recon + * we only will use the reocn for only predicting intra Mbs + * THis will need only right and bottom edge 4x4 blocks recon + * Hence we selectively enable them using control signal(including DC) + */ + if (ps_proc->u4_compute_recon != 1) + { + u4_cntrl &= 0x111F0000; + } + + if (u4_cntrl) + { + ih264e_luma_16x16_idctrans_iquant_itrans_recon( + ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb, + i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd, + ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, + u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM, + ps_proc->pv_scratch_buff); + } + else + { + ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb, + i4_pseudo_pred_strd, i4_rec_strd, + MB_SIZE, MB_SIZE, NULL, 0); + } + + + return (u1_cbp_l); +} + +/** +******************************************************************************* +* +* @brief performs chroma core coding for inter macro blocks +* +* @par Description: +* If the current mb is to be coded as inter predicted mb,based on the sub mb partitions +* and corresponding motion vectors generated by ME ,prediction is done. +* Then, error is computed between the input blk and the estimated blk. +* This error is transformed , quantized. The quantized coefficients +* are packed in scan order for +* entropy coding. +* +* @param[in] ps_proc_ctxt +* pointer to the current macro block context +* +* @returns u1_cbp_l +* coded block pattern chroma +* +* @remarks none +* +******************************************************************************* +*/ +UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc) +{ + /* Codec Context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* pointer to ref macro block */ + UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma; + + /* pointer to src macro block */ + UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; + + /* pointer to prediction macro block */ + UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; + + /* pointer to residual macro block */ + WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; + + /* strides */ + WORD32 i4_src_strd = ps_proc->i4_src_strd; + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + WORD32 i4_pred_strd = ps_proc->i4_pred_strd; + WORD32 i4_res_strd = ps_proc->i4_res_strd; + + /* coded block pattern */ + UWORD8 u1_cbp_c = 0; + + /*Control signal for inverse transform*/ + UWORD32 u4_cntrl; + + /* number of non zero coeffs*/ + UWORD8 au1_nnz[10] = {0}; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; + + /* pointer to packed mb coeff data */ + void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); + + /*See if we need to swap U and V plances for entropy*/ + UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU; + + /********************************************************/ + /* prediction */ + /********************************************************/ + ih264e_motion_comp_chroma(ps_proc); + + /********************************************************/ + /* error estimation, */ + /* transform */ + /* quantization */ + /********************************************************/ + ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, + pu1_pred_mb, pi2_res_mb, + i4_src_strd, i4_pred_strd, + i4_res_strd, + ps_qp_params->pu2_scale_mat, + ps_qp_params->pu2_thres_mat, + ps_qp_params->u1_qbits, + ps_qp_params->u4_dead_zone, + au1_nnz); + + /********************************************************/ + /* pack coeff data for entropy coding */ + /********************************************************/ + ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, + au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); + + /********************************************************/ + /* ierror estimation, */ + /* itransform */ + /* iquantization */ + /********************************************************/ + + /* If the frame is not to be used for P frame reference or dumping recon + * we only will use the reocn for only predicting intra Mbs + * THis will need only right and bottom edge 4x4 blocks recon + * Hence we selectively enable them using control signal(including DC) + */ + if (!ps_proc->u4_compute_recon) + { + u4_cntrl &= 0x7700C000; + } + + if (u4_cntrl) + { + ih264e_chroma_8x8_idctrans_iquant_itrans_recon( + ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb, + i4_res_strd, i4_pred_strd, i4_rec_strd, + ps_qp_params->pu2_iscale_mat, + ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, + u4_cntrl, ps_proc->pv_scratch_buff); + } + else + { + ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd, + i4_rec_strd, MB_SIZE >> 1, MB_SIZE, + NULL, 0); + } + + return (u1_cbp_c); +} |