summaryrefslogtreecommitdiffstats
path: root/common/impeg2_idct.c
diff options
context:
space:
mode:
Diffstat (limited to 'common/impeg2_idct.c')
-rw-r--r--common/impeg2_idct.c500
1 files changed, 500 insertions, 0 deletions
diff --git a/common/impeg2_idct.c b/common/impeg2_idct.c
new file mode 100644
index 0000000..6834260
--- /dev/null
+++ b/common/impeg2_idct.c
@@ -0,0 +1,500 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : impeg2_idct.c */
+/* */
+/* Description : Contains 2d idct and invese quantization functions */
+/* */
+/* List of Functions : impeg2_idct_recon_dc() */
+/* impeg2_idct_recon_dc_mismatch() */
+/* impeg2_idct_recon() */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 10 09 2005 Hairsh M First Version */
+/* */
+/*****************************************************************************/
+/*
+ IEEE - 1180 results for this IDCT
+ L 256 256 5 5 300 300 384 384 Thresholds
+ H 255 255 5 5 300 300 383 383
+ sign 1 -1 1 -1 1 -1 1 -1
+ Peak Error 1 1 1 1 1 1 1 1 1
+ Peak Mean Square Error 0.0191 0.0188 0.0108 0.0111 0.0176 0.0188 0.0165 0.0177 0.06
+ Overall Mean Square Error 0.01566406 0.01597656 0.0091875 0.00908906 0.01499063 0.01533281 0.01432344 0.01412344 0.02
+ Peak Mean Error 0.0027 0.0026 0.0028 0.002 0.0017 0.0033 0.0031 0.0025 0.015
+ Overall Mean Error 0.00002656 -0.00031406 0.00016875 0.00005469 -0.00003125 0.00011406 0.00009219 0.00004219 0.0015
+ */
+#include <stdio.h>
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+
+#include "impeg2_macros.h"
+#include "impeg2_globals.h"
+#include "impeg2_idct.h"
+
+
+void impeg2_idct_recon_dc(WORD16 *pi2_src,
+ WORD16 *pi2_tmp,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_dst,
+ WORD32 i4_src_strd,
+ WORD32 i4_pred_strd,
+ WORD32 i4_dst_strd,
+ WORD32 i4_zero_cols,
+ WORD32 i4_zero_rows)
+{
+ WORD32 i4_val, i, j;
+
+ UNUSED(pi2_tmp);
+ UNUSED(i4_src_strd);
+ UNUSED(i4_zero_cols);
+ UNUSED(i4_zero_rows);
+
+ i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
+ i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
+ i4_val = i4_val * gai2_impeg2_idct_q11[0];
+ i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
+
+ for(i = 0; i < TRANS_SIZE_8; i++)
+ {
+ for(j = 0; j < TRANS_SIZE_8; j++)
+ {
+ pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]);
+ }
+ pu1_dst += i4_dst_strd;
+ pu1_pred += i4_pred_strd;
+ }
+}
+void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src,
+ WORD16 *pi2_tmp,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_dst,
+ WORD32 i4_src_strd,
+ WORD32 i4_pred_strd,
+ WORD32 i4_dst_strd,
+ WORD32 i4_zero_cols,
+ WORD32 i4_zero_rows)
+
+{
+ WORD32 i4_val, i, j;
+ WORD32 i4_count = 0;
+ WORD32 i4_sum;
+
+ UNUSED(pi2_tmp);
+ UNUSED(i4_src_strd);
+ UNUSED(i4_zero_cols);
+ UNUSED(i4_zero_rows);
+
+ i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
+ i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
+
+ i4_val *= gai2_impeg2_idct_q11[0];
+ for(i = 0; i < TRANS_SIZE_8; i++)
+ {
+ for (j = 0; j < TRANS_SIZE_8; j++)
+ {
+ i4_sum = i4_val;
+ i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count];
+ i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
+ i4_sum += pu1_pred[j];
+ pu1_dst[j] = CLIP_U8(i4_sum);
+ i4_count++;
+ }
+
+ pu1_dst += i4_dst_strd;
+ pu1_pred += i4_pred_strd;
+ }
+
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs Inverse transform and reconstruction for 8x8
+ * input block
+ *
+ * @par Description:
+ * Performs inverse transform and adds the prediction data and clips output
+ * to 8 bit
+ *
+ * @param[in] pi2_src
+ * Input 8x8 coefficients
+ *
+ * @param[in] pi2_tmp
+ * Temporary 8x8 buffer for storing inverse
+ *
+ * transform
+ * 1st stage output
+ *
+ * @param[in] pu1_pred
+ * Prediction 8x8 block
+ *
+ * @param[out] pu1_dst
+ * Output 8x8 block
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction stride
+ *
+ * @param[in] dst_strd
+ * Output Stride
+ *
+ * @param[in] shift
+ * Output shift
+ *
+ * @param[in] zero_cols
+ * Zero columns in pi2_src
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void impeg2_idct_recon(WORD16 *pi2_src,
+ WORD16 *pi2_tmp,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_dst,
+ WORD32 i4_src_strd,
+ WORD32 i4_pred_strd,
+ WORD32 i4_dst_strd,
+ WORD32 i4_zero_cols,
+ WORD32 i4_zero_rows)
+{
+ WORD32 j, k;
+ WORD32 ai4_e[4], ai4_o[4];
+ WORD32 ai4_ee[2], ai4_eo[2];
+ WORD32 i4_add;
+ WORD32 i4_shift;
+ WORD16 *pi2_tmp_orig;
+ WORD32 i4_trans_size;
+ WORD32 i4_zero_rows_2nd_stage = i4_zero_cols;
+ WORD32 i4_row_limit_2nd_stage;
+
+ i4_trans_size = TRANS_SIZE_8;
+
+ pi2_tmp_orig = pi2_tmp;
+
+ if((i4_zero_cols & 0xF0) == 0xF0)
+ i4_row_limit_2nd_stage = 4;
+ else
+ i4_row_limit_2nd_stage = TRANS_SIZE_8;
+
+
+ if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
+ {
+ /************************************************************************************************/
+ /**********************************START - IT_RECON_8x8******************************************/
+ /************************************************************************************************/
+
+ /* Inverse Transform 1st stage */
+ i4_shift = IDCT_STG1_SHIFT;
+ i4_add = 1 << (i4_shift - 1);
+
+ for(j = 0; j < i4_row_limit_2nd_stage; j++)
+ {
+ /* Checking for Zero Cols */
+ if((i4_zero_cols & 1) == 1)
+ {
+ memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
+ }
+ else
+ {
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ for(k = 0; k < 4; k++)
+ {
+ ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
+ + gai2_impeg2_idct_q15[3 * 8 + k]
+ * pi2_src[3 * i4_src_strd];
+ }
+ ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd];
+ ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd];
+ ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0];
+ ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0];
+
+ /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+ ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+ ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+ ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+ ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+ for(k = 0; k < 4; k++)
+ {
+ pi2_tmp[k] =
+ CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+ pi2_tmp[k + 4] =
+ CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+ }
+ }
+ pi2_src++;
+ pi2_tmp += i4_trans_size;
+ i4_zero_cols = i4_zero_cols >> 1;
+ }
+
+ pi2_tmp = pi2_tmp_orig;
+
+ /* Inverse Transform 2nd stage */
+ i4_shift = IDCT_STG2_SHIFT;
+ i4_add = 1 << (i4_shift - 1);
+ if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
+ {
+ for(j = 0; j < i4_trans_size; j++)
+ {
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ for(k = 0; k < 4; k++)
+ {
+ ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
+ + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
+ }
+ ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
+ ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
+ ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
+ ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
+
+ /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+ ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+ ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+ ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+ ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+ for(k = 0; k < 4; k++)
+ {
+ WORD32 itrans_out;
+ itrans_out =
+ CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+ pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
+ itrans_out =
+ CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+ pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
+ }
+ pi2_tmp++;
+ pu1_pred += i4_pred_strd;
+ pu1_dst += i4_dst_strd;
+ }
+ }
+ else /* All rows of output of 1st stage are non-zero */
+ {
+ for(j = 0; j < i4_trans_size; j++)
+ {
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ for(k = 0; k < 4; k++)
+ {
+ ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
+ + gai2_impeg2_idct_q11[3 * 8 + k]
+ * pi2_tmp[3 * i4_trans_size]
+ + gai2_impeg2_idct_q11[5 * 8 + k]
+ * pi2_tmp[5 * i4_trans_size]
+ + gai2_impeg2_idct_q11[7 * 8 + k]
+ * pi2_tmp[7 * i4_trans_size];
+ }
+
+ ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
+ + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
+ ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
+ + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
+ ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
+ + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
+ ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
+ + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
+
+ /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+ ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+ ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+ ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+ ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+ for(k = 0; k < 4; k++)
+ {
+ WORD32 itrans_out;
+ itrans_out =
+ CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+ pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
+ itrans_out =
+ CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+ pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
+ }
+ pi2_tmp++;
+ pu1_pred += i4_pred_strd;
+ pu1_dst += i4_dst_strd;
+ }
+ }
+ /************************************************************************************************/
+ /************************************END - IT_RECON_8x8******************************************/
+ /************************************************************************************************/
+ }
+ else /* All rows of input are non-zero */
+ {
+ /************************************************************************************************/
+ /**********************************START - IT_RECON_8x8******************************************/
+ /************************************************************************************************/
+
+ /* Inverse Transform 1st stage */
+ i4_shift = IDCT_STG1_SHIFT;
+ i4_add = 1 << (i4_shift - 1);
+
+ for(j = 0; j < i4_row_limit_2nd_stage; j++)
+ {
+ /* Checking for Zero Cols */
+ if((i4_zero_cols & 1) == 1)
+ {
+ memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
+ }
+ else
+ {
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ for(k = 0; k < 4; k++)
+ {
+ ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
+ + gai2_impeg2_idct_q15[3 * 8 + k]
+ * pi2_src[3 * i4_src_strd]
+ + gai2_impeg2_idct_q15[5 * 8 + k]
+ * pi2_src[5 * i4_src_strd]
+ + gai2_impeg2_idct_q15[7 * 8 + k]
+ * pi2_src[7 * i4_src_strd];
+ }
+
+ ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]
+ + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd];
+ ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]
+ + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd];
+ ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]
+ + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd];
+ ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]
+ + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd];
+
+ /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+ ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+ ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+ ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+ ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+ for(k = 0; k < 4; k++)
+ {
+ pi2_tmp[k] =
+ CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+ pi2_tmp[k + 4] =
+ CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+ }
+ }
+ pi2_src++;
+ pi2_tmp += i4_trans_size;
+ i4_zero_cols = i4_zero_cols >> 1;
+ }
+
+ pi2_tmp = pi2_tmp_orig;
+
+ /* Inverse Transform 2nd stage */
+ i4_shift = IDCT_STG2_SHIFT;
+ i4_add = 1 << (i4_shift - 1);
+ if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
+ {
+ for(j = 0; j < i4_trans_size; j++)
+ {
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ for(k = 0; k < 4; k++)
+ {
+ ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
+ + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
+ }
+ ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
+ ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
+ ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
+ ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
+
+ /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+ ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+ ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+ ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+ ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+ for(k = 0; k < 4; k++)
+ {
+ WORD32 itrans_out;
+ itrans_out =
+ CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+ pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
+ itrans_out =
+ CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+ pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
+ }
+ pi2_tmp++;
+ pu1_pred += i4_pred_strd;
+ pu1_dst += i4_dst_strd;
+ }
+ }
+ else /* All rows of output of 1st stage are non-zero */
+ {
+ for(j = 0; j < i4_trans_size; j++)
+ {
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ for(k = 0; k < 4; k++)
+ {
+ ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
+ + gai2_impeg2_idct_q11[3 * 8 + k]
+ * pi2_tmp[3 * i4_trans_size]
+ + gai2_impeg2_idct_q11[5 * 8 + k]
+ * pi2_tmp[5 * i4_trans_size]
+ + gai2_impeg2_idct_q11[7 * 8 + k]
+ * pi2_tmp[7 * i4_trans_size];
+ }
+
+ ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
+ + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
+ ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
+ + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
+ ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
+ + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
+ ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
+ + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
+
+ /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+ ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+ ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+ ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+ ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+ for(k = 0; k < 4; k++)
+ {
+ WORD32 itrans_out;
+ itrans_out =
+ CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+ pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
+ itrans_out =
+ CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+ pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
+ }
+ pi2_tmp++;
+ pu1_pred += i4_pred_strd;
+ pu1_dst += i4_dst_strd;
+ }
+ }
+ /************************************************************************************************/
+ /************************************END - IT_RECON_8x8******************************************/
+ /************************************************************************************************/
+ }
+}
+