Initial version

Change-Id: I7efe9a589cd24edf86e8d086b40c27cbbf8b4017
author: Hamsalekha S <hamsalekha.s@ittiam.com> 2015-03-13 21:24:58 +0530
committer: Hamsalekha S <hamsalekha.s@ittiam.com> 2015-04-02 15:59:02 +0530
commit: 8d3d303c7942ced6a987a52db8977d768dc3605f (patch)
tree: cc806c96794356996b13ba9970941d0aed74a97e /common/ih264_resi_trans_quant.c
parent: 3956d913d37327dcb340f836e604b04bd478b158 (diff)
download: android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.gz
android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.bz2
android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.zip
1 files changed, 814 insertions, 0 deletions
diff --git a/common/ih264_resi_trans_quant.c b/common/ih264_resi_trans_quant.c
new file mode 100755
index 0000000..cf1d43c
--- /dev/null
+++ b/common/ih264_resi_trans_quant.c
@@ -0,0 +1,814 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ *  ih264_resi_trans_quant.c
+ *
+ * @brief
+ *  Contains function definitions single stage  forward transform for H.264
+ *  It will calculate the residue, do the cf and then do quantization
+ *
+ * @author
+ *  Ittiam
+ *
+ * @par List of Functions:
+ *  - ih264_resi_trans_quant_4x4()
+ *  - ih264_resi_trans_quant_chroma_4x4
+ *  - ih264_hadamard_quant_4x4
+ *  - ih264_hadamard_quant_2x2_uv
+ *  - ih264_resi_trans_quant_8x8
+ *
+ * @remarks
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stddef.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_macros.h"
+#include "ih264_trans_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *   This function performs forward transform and quantization on a 4*4 block
+ *
+ * @par Description:
+ *   The function accepts source buffer and estimation buffer. From these, it
+ *   computes the residue. This is residue is then transformed and quantized.
+ *   The transform and quantization are in placed computed. They use the residue
+ *   buffer for this.
+ *
+ * @param[in] pu1_src
+ *   Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ *   Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ *   Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ *   Source stride
+ *
+ * @param[in] pred_strd
+ *   Prediction stride
+ *
+ * @param[in] dst_strd
+ *   Destination stride
+ *
+ * @param[in] u4_qbits
+ *    QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ *   Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ *   Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ *   Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ *   Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ *   None
+ *
+ *******************************************************************************
+ */
+void ih264_resi_trans_quant_4x4(UWORD8 *pu1_src,
+                                UWORD8 *pu1_pred,
+                                WORD16 *pi2_out,
+                                WORD32 src_strd,
+                                WORD32 pred_strd,
+                                const UWORD16 *pu2_scale_matrix,
+                                const UWORD16 *pu2_threshold_matrix,
+                                UWORD32 u4_qbits,
+                                UWORD32 u4_round_factor,
+                                UWORD8 *pu1_nnz,
+                                WORD16 *pi2_alt_dc_addr)
+{
+    UWORD32 i;
+    WORD32  x0, x1, x2, x3, x4, x5, x6, x7;
+    WORD32  i4_value, i4_sign;
+    UWORD32 u4_abs_value;
+    WORD16  *pi2_out_tmp = pi2_out;
+    UWORD32 u4_nonzero_coeff = 0;
+
+    for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        /* computing prediction error (residue) */
+        x4 = pu1_src[0] - pu1_pred[0];
+        x5 = pu1_src[1] - pu1_pred[1];
+        x6 = pu1_src[2] - pu1_pred[2];
+        x7 = pu1_src[3] - pu1_pred[3];
+
+        /* Horizontal transform */
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        pi2_out_tmp[0] = x0 + x1;
+        pi2_out_tmp[1] = (x3 <<1) + x2;
+        pi2_out_tmp[2] = x0 - x1;
+        pi2_out_tmp[3] = x3 - (x2<<1);
+
+        /* pointing to next row; */
+        pu1_src += src_strd;
+        pu1_pred += pred_strd;
+        pi2_out_tmp += 4;
+
+    }
+    pi2_out_tmp = pi2_out;
+    for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+
+        /* Vertical transform and quantization */
+        x4 = pi2_out_tmp[0];
+        x5 = pi2_out_tmp[4];
+        x6 = pi2_out_tmp[8];
+        x7 = pi2_out_tmp[12];
+
+
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        /* quantization is done in place */
+
+        i4_value = x0 + x1;
+
+        if(i==0)
+        {
+          (*pi2_alt_dc_addr) = i4_value;
+        }
+
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, u4_nonzero_coeff);
+        pi2_out_tmp[0] = i4_value;
+
+
+        i4_value = (x3 << 1) + x2;
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits, u4_nonzero_coeff);
+        pi2_out_tmp[4] = i4_value;
+
+
+        i4_value = x0 - x1;
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, u4_nonzero_coeff);
+        pi2_out_tmp[8] = i4_value;
+
+
+        i4_value = x3 - (x2 << 1);
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor, u4_qbits, u4_nonzero_coeff);
+        pi2_out_tmp[12] = i4_value;
+
+        pi2_out_tmp ++;
+        pu2_scale_matrix++;
+        pu2_threshold_matrix++;
+    }
+
+    /* Return total nonzero coefficients in the current sub block */
+    *pu1_nnz =  u4_nonzero_coeff;
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *   This function performs forward transform and quantization on a 4*4 chroma block
+ *   with interleaved values
+ *
+ * @par Description:
+ *   The function accepts source buffer and estimation buffer. From these, it
+ *   computes the residue. This is residue is then transformed and quantized.
+ *   The transform and quantization are in placed computed. They use the residue
+ *   buffer for this.
+ *
+ * @param[in] pu1_src
+ *   Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ *   Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ *   Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ *   Source stride
+ *
+ * @param[in] pred_strd
+ *   Prediction stride
+ *
+ * @param[in] dst_strd
+ *   Destination stride
+ *
+ * @param[in] u4_qbits
+ *    QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ *   Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ *   Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ *   Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ *   Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ *   None
+ *
+ *******************************************************************************
+ */
+void ih264_resi_trans_quant_chroma_4x4(UWORD8 *pu1_src,
+                                       UWORD8 *pu1_pred,
+                                       WORD16 *pi2_out,
+                                       WORD32 src_strd,
+                                       WORD32 pred_strd,
+                                       const UWORD16 *pu2_scale_matrix,
+                                       const UWORD16 *pu2_threshold_matrix,
+                                       UWORD32 u4_qbits,
+                                       UWORD32 u4_round_factor,
+                                       UWORD8 *pu1_nnz,
+                                       WORD16 *pu1_dc_alt_addr)
+{
+    UWORD32 i;
+    WORD32  x0, x1, x2, x3, x4, x5, x6, x7;
+    WORD32  i4_value, i4_sign;
+    UWORD32 u4_abs_value;
+    WORD16  *pi2_out_tmp = pi2_out;
+    UWORD32 u4_nonzero_coeff = 0;
+
+    for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        /* computing prediction error (residue) */
+        x4 = pu1_src[0] - pu1_pred[0];
+        x5 = pu1_src[2] - pu1_pred[2];
+        x6 = pu1_src[4] - pu1_pred[4];
+        x7 = pu1_src[6] - pu1_pred[6];
+
+        /* Horizontal transform */
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        pi2_out_tmp[0] = x0 + x1;
+        pi2_out_tmp[1] = (x3 <<1) + x2;
+        pi2_out_tmp[2] = x0 - x1;
+        pi2_out_tmp[3] = x3 - (x2<<1);
+
+        /* pointing to next row; */
+        pu1_src += src_strd;
+        pu1_pred += pred_strd;
+        pi2_out_tmp += 4;
+
+    }
+    pi2_out_tmp = pi2_out;
+    for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+
+        /* Vertical transform and quantization */
+        x4 = pi2_out_tmp[0];
+        x5 = pi2_out_tmp[4];
+        x6 = pi2_out_tmp[8];
+        x7 = pi2_out_tmp[12];
+
+
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        /* quantization is done in place */
+
+        i4_value = x0 + x1;
+
+        if(i==0)
+        {
+          *pu1_dc_alt_addr = i4_value;
+        }
+
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[0] = i4_value;
+
+        i4_value = (x3 << 1) + x2;
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[4],
+                  pu2_scale_matrix[4], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[4] = i4_value;
+
+        i4_value = x0 - x1;
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[8],
+                  pu2_scale_matrix[8], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[8] = i4_value;
+
+        i4_value = x3 - (x2 << 1);
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[12],
+                  pu2_scale_matrix[12], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[12] = i4_value;
+
+        pi2_out_tmp ++;
+        pu2_scale_matrix++;
+        pu2_threshold_matrix++;
+    }
+
+    /* Return total nonzero coefficients in the current sub block */
+    *pu1_nnz =  u4_nonzero_coeff;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *   This function performs forward hadamard transform and quantization on a 4*4 block
+ *
+ * @par Description:
+ *   The function accepts source buffer and estimation buffer. From these, it
+ *   computes the residue. This is residue is then transformed and quantized.
+ *   The transform and quantization are in placed computed. They use the residue
+ *   buffer for this.
+ *
+ * @param[in] pu1_src
+ *   Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ *   Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ *   Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ *   Source stride
+ *
+ * @param[in] pred_strd
+ *   Prediction stride
+ *
+ * @param[in] dst_strd
+ *   Destination stride
+ *
+ * @param[in] u4_qbits
+ *    QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ *   Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ *   Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ *   Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ *   Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ *   None
+ *
+ */
+
+void ih264_hadamard_quant_4x4(WORD16 *pi2_src,
+                              WORD16 *pi2_dst,
+                              const UWORD16 *pu2_scale_matrix,
+                              const UWORD16 *pu2_threshold_matrix,
+                              UWORD32 u4_qbits,
+                              UWORD32 u4_round_factor,
+                              UWORD8 *pu1_nnz)
+{
+  WORD32 i;
+  WORD32 x0,x1,x2,x3,x4,x5,x6,x7,i4_value;
+  UWORD32 u4_abs_value;
+  WORD32 i4_sign;
+
+  *pu1_nnz = 0;
+
+  for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        x4 = pi2_src[0];
+        x5 = pi2_src[1];
+        x6 = pi2_src[2];
+        x7 = pi2_src[3];
+
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+        pi2_dst[0] = x0 + x1;
+        pi2_dst[1] = x3 + x2;
+        pi2_dst[2] = x0 - x1;
+        pi2_dst[3] = x3 - x2;
+
+        pi2_src += 4;
+        pi2_dst += 4;
+    }
+
+    /* Vertical transform and quantization */
+    pi2_dst -= SUB_BLK_WIDTH_4x4<<2;
+
+    for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+    {
+        x4 = pi2_dst[0];
+        x5 = pi2_dst[4];
+        x6 = pi2_dst[8];
+        x7 = pi2_dst[12] ;
+
+        x0 = x4 + x7;
+        x1 = x5 + x6;
+        x2 = x5 - x6;
+        x3 = x4 - x7;
+
+
+        i4_value = (x0 + x1) >> 1;
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]);
+        pi2_dst[0] = i4_value;
+
+        i4_value = (x3 + x2) >> 1;
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]);
+        pi2_dst[4] = i4_value;
+
+        i4_value = (x0 - x1) >> 1;
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]);
+        pi2_dst[8] = i4_value;
+
+        i4_value = (x3 - x2) >> 1;
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]);
+        pi2_dst[12] = i4_value;
+
+        pi2_dst ++;
+    }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *   This function performs forward hadamard transform and quantization on a 2*2 block
+ *   for both U and V planes
+ *
+ * @par Description:
+ *   The function accepts source buffer and estimation buffer. From these, it
+ *   computes the residue. This is residue is then transformed and quantized.
+ *   The transform and quantization are in placed computed. They use the residue
+ *   buffer for this.
+ *
+ * @param[in] pu1_src
+ *   Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ *   Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ *   Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ *   Source stride
+ *
+ * @param[in] pred_strd
+ *   Prediction stride
+ *
+ * @param[in] dst_strd
+ *   Destination stride
+ *
+ * @param[in] u4_qbits
+ *    QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ *   Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ *   Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ *   Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ *   Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ *   NNZ for dc is populated at 0 and 5th position of pu1_nnz
+ *
+ */
+
+void ih264_hadamard_quant_2x2_uv(WORD16 *pi2_src,
+                                 WORD16 *pi2_dst,
+                                 const UWORD16 *pu2_scale_matrix,
+                                 const UWORD16 *pu2_threshold_matrix,
+                                 UWORD32 u4_qbits,
+                                 UWORD32 u4_round_factor,
+                                 UWORD8 *pu1_nnz)
+{
+    WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
+    WORD32 i4_value, i4_sign, plane;
+    UWORD32 u4_abs_value;
+
+    for(plane = 0; plane < 2; plane++)
+    {
+        pu1_nnz[plane] = 0;
+
+        /* Horizontal transform */
+        x4 = pi2_src[0];
+        x5 = pi2_src[1];
+        x6 = pi2_src[2];
+        x7 = pi2_src[3];
+
+        x0 = x4 + x5;
+        x1 = x4 - x5;
+        x2 = x6 + x7;
+        x3 = x6 - x7;
+
+        /* Vertical transform and quantization */
+        i4_value = (x0 + x2);
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[plane]);
+        pi2_dst[0] = i4_value;
+
+        i4_value = (x0 - x2);
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[plane]);
+        pi2_dst[2] = i4_value;
+
+        i4_value = (x1 - x3);
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[plane]);
+        pi2_dst[3] = i4_value;
+
+        i4_value = (x1 + x3);
+        FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  pu1_nnz[plane]);
+        pi2_dst[1] = i4_value;
+
+        pi2_dst += 4;
+        pi2_src += 4;
+
+    }
+}
+
+/*
+ *******************************************************************************
+ *
+ * @brief
+ *  This function performs Single stage forward transform CF8 and quantization on 8*8 blocks
+ *  for h.264
+ *
+ * @par Description:
+ *  Performs single stage 8x8 forward transform CF8 after calculating the residue
+ *  The result is then quantized
+ *
+ * @param[in] pu1_src
+ *  Input 8x8 pixels
+ *
+ * @param[in] pu1_pred
+ *  Input 8x8 pixels
+ *
+ * @param[in] pi1_out
+ * Output 8x8 pixels
+ *
+ * @param[in] u4_thresh
+ *  Threshold under which the coeffs are not quantized
+ *
+ *  @param[in] u4_qp_div
+ *  QP/6
+ *
+ *  @param[in] u4_qp_rem
+ *  QP%6
+ *
+ * @param[in] u2_src_stride
+ *  Source stride
+ *
+ * @param[in] pred_strd
+ * stride for prediciton buffer
+ *
+ *  @param[in] dst_strd
+ *  stride for destination buffer
+ *
+ *  @param[in] pu4_quant_mat
+ *  Pointer to the 4x4 quantization matrix
+ *
+ * @returns  Void
+ *
+ *
+ *******************************************************************************
+ */
+void ih264_resi_trans_quant_8x8(UWORD8 *pu1_src,
+                                UWORD8 *pu1_pred,
+                                WORD16 *pi2_out,
+                                WORD32 src_strd,
+                                WORD32 pred_strd,
+                                const UWORD16 *pu2_scale_matrix,
+                                const UWORD16 *pu2_threshold_matrix,
+                                UWORD32 u4_qbits,
+                                UWORD32 u4_round_factor,
+                                UWORD8 *pu1_nnz,
+                                WORD16 *pu1_dc_alt_addr)
+
+{
+    WORD16 *pi2_out_tmp = pi2_out;
+    UWORD32 i;
+    WORD32 a0, a1, a2, a3, a4, a5, a6, a7;
+    WORD32 r0, r1, r2, r3, r4, r5, r6, r7;
+    WORD32 i4_sign;
+    UWORD32 u4_abs_value;
+    UWORD32 u4_nonzero_coeff = 0;
+
+    UNUSED(pu1_dc_alt_addr);
+
+    /*Horizontal transform */
+    /* we are going to use the a's and r's in a twisted way since */
+    /*i dont want to declare more variables */
+    for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
+    {
+        r0 = pu1_src[0];
+        r0 -= pu1_pred[0];
+        r1 = pu1_src[1];
+        r1 -= pu1_pred[1];
+        r2 = pu1_src[2];r2 -= pu1_pred[2];
+        r3 = pu1_src[3];r3 -= pu1_pred[3];
+        r4 = pu1_src[4];r4 -= pu1_pred[4];
+        r5 = pu1_src[5];r5 -= pu1_pred[5];
+        r6 = pu1_src[6];r6 -= pu1_pred[6];
+        r7 = pu1_src[7];r7 -= pu1_pred[7];
+
+
+        a0 = r0 + r7;
+        a1 = r1 + r6;
+        a2 = r2 + r5;
+        a3 = r3 + r4;
+
+        a4 = a0 + a3;
+        a5 = a1 + a2;
+        a6 = a0 - a3;
+        a7 = a1 - a2;
+
+        pi2_out_tmp[0] = a4 + a5;
+
+        pi2_out_tmp[2] = a6 + (a7>>1);
+        pi2_out_tmp[4] = a4 - a5;
+        pi2_out_tmp[6] = (a6>>1) - a7;
+
+        a0 = r0 - r7;
+        a1 = r1 - r6;
+        a2 = r2 - r5;
+        a3 = r3 - r4;
+
+        a4 = a1 + a2 + ((a0>>1) + a0);
+        a5 = a0 - a3 - ((a2>>1) + a2);
+        a6 = a0 + a3 - ((a1>>1) + a1);
+        a7 = a1 - a2 + ((a3>>1) + a3);
+
+        pi2_out_tmp[1] = a4 + (a7>>2);
+        pi2_out_tmp[3] = a5 + (a6>>2);
+        pi2_out_tmp[5] = a6 - (a5>>2);
+        pi2_out_tmp[7] = (a4>>2) - a7;
+
+        pu1_src += src_strd;
+        pu1_pred += pred_strd;
+        pi2_out_tmp += 8;
+    }
+
+    /*vertical transform and quant */
+
+    pi2_out_tmp = pi2_out;
+
+    for (i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
+    {
+
+        r0 = pi2_out_tmp[0];
+        r1 = pi2_out_tmp[8];
+        r2 = pi2_out_tmp[16];
+        r3 = pi2_out_tmp[24];
+        r4 = pi2_out_tmp[32];
+        r5 = pi2_out_tmp[40];
+        r6 = pi2_out_tmp[48];
+        r7 = pi2_out_tmp[56];
+
+        a0 = r0 + r7;
+        a1 = r1 + r6;
+        a2 = r2 + r5;
+        a3 = r3 + r4;
+
+        a4 = a0 + a3;
+        a5 = a1 + a2;
+        a6 = a0 - a3;
+        a7 = a1 - a2;
+
+        a0 = r0 - r7;
+        a1 = r1 - r6;
+        a2 = r2 - r5;
+        a3 = r3 - r4;
+
+        r0 = a4 + a5;
+        r2 = a6 + (a7>>1);
+        r4 = a4 - a5;
+        r6 = (a6>>1) - a7;
+
+        a4 = a1 + a2 + ((a0>>1) + a0);
+        a5 = a0 - a3 - ((a2>>1) + a2);
+        a6 = a0 + a3 - ((a1>>1) + a1);
+        a7 = a1 - a2 + ((a3>>1) + a3);
+
+        r1 = a4 + (a7>>2);
+        r3 = a5 + (a6>>2);
+        r5 = a6 - (a5>>2);
+        r7 = (a4>>2) - a7;
+
+        FWD_QUANT(r0, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+                  pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[0] = r0;
+
+        FWD_QUANT(r1, u4_abs_value, i4_sign, pu2_threshold_matrix[8],
+                  pu2_scale_matrix[8], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[8] = r1;
+
+        FWD_QUANT(r2, u4_abs_value, i4_sign, pu2_threshold_matrix[16],
+                  pu2_scale_matrix[16], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[16] = r2;
+
+        FWD_QUANT(r3, u4_abs_value, i4_sign, pu2_threshold_matrix[24],
+                  pu2_scale_matrix[24], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[24] = r3;
+
+        FWD_QUANT(r4, u4_abs_value, i4_sign, pu2_threshold_matrix[32],
+                  pu2_scale_matrix[32], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[32] = r4;
+
+        FWD_QUANT(r5, u4_abs_value, i4_sign, pu2_threshold_matrix[40],
+                  pu2_scale_matrix[40], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[40] = r5;
+
+        FWD_QUANT(r6, u4_abs_value, i4_sign, pu2_threshold_matrix[48],
+                  pu2_scale_matrix[48], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[48] = r6;
+
+        FWD_QUANT(r7, u4_abs_value, i4_sign, pu2_threshold_matrix[56],
+                  pu2_scale_matrix[56], u4_round_factor, u4_qbits,
+                  u4_nonzero_coeff);
+        pi2_out_tmp[56] = r7;
+
+        pi2_out_tmp++;
+        pu2_scale_matrix++;
+        pu2_threshold_matrix++;
+    }
+       /* Return total nonzero coefficients in the current sub block */
+        *pu1_nnz =  u4_nonzero_coeff;
+}
author	Hamsalekha S <hamsalekha.s@ittiam.com>	2015-03-13 21:24:58 +0530
committer	Hamsalekha S <hamsalekha.s@ittiam.com>	2015-04-02 15:59:02 +0530
commit	8d3d303c7942ced6a987a52db8977d768dc3605f (patch)
tree	cc806c96794356996b13ba9970941d0aed74a97e /common/ih264_resi_trans_quant.c
parent	3956d913d37327dcb340f836e604b04bd478b158 (diff)
download	android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.gz android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.bz2 android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.zip