diff options
Diffstat (limited to 'libvpx/vp9/encoder/vp9_dct.c')
-rw-r--r-- | libvpx/vp9/encoder/vp9_dct.c | 479 |
1 files changed, 235 insertions, 244 deletions
diff --git a/libvpx/vp9/encoder/vp9_dct.c b/libvpx/vp9/encoder/vp9_dct.c index 065992a..d523239 100644 --- a/libvpx/vp9/encoder/vp9_dct.c +++ b/libvpx/vp9/encoder/vp9_dct.c @@ -18,7 +18,11 @@ #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_systemdependent.h" -#include "vp9/encoder/vp9_dct.h" +static INLINE int fdct_round_shift(int input) { + int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); + assert(INT16_MIN <= rv && rv <= INT16_MAX); + return rv; +} static void fdct4(const int16_t *input, int16_t *output) { int16_t step[4]; @@ -31,19 +35,19 @@ static void fdct4(const int16_t *input, int16_t *output) { temp1 = (step[0] + step[1]) * cospi_16_64; temp2 = (step[0] - step[1]) * cospi_16_64; - output[0] = dct_const_round_shift(temp1); - output[2] = dct_const_round_shift(temp2); + output[0] = fdct_round_shift(temp1); + output[2] = fdct_round_shift(temp2); temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; - output[1] = dct_const_round_shift(temp1); - output[3] = dct_const_round_shift(temp2); + output[1] = fdct_round_shift(temp1); + output[3] = fdct_round_shift(temp2); } void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { // The 2D transform is done with two passes which are actually pretty // similar. In the first one, we transform the columns and transpose // the results. In the second one, we transform the rows. To achieve that, - // as the first pass results are transposed, we tranpose the columns (that + // as the first pass results are transposed, we transpose the columns (that // is the transposed rows) and transpose the results (so that it goes back // in normal/row positions). int pass; @@ -80,12 +84,12 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { step[3] = input[0] - input[3]; temp1 = (step[0] + step[1]) * cospi_16_64; temp2 = (step[0] - step[1]) * cospi_16_64; - out[0] = dct_const_round_shift(temp1); - out[2] = dct_const_round_shift(temp2); + out[0] = fdct_round_shift(temp1); + out[2] = fdct_round_shift(temp2); temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; - out[1] = dct_const_round_shift(temp1); - out[3] = dct_const_round_shift(temp2); + out[1] = fdct_round_shift(temp1); + out[3] = fdct_round_shift(temp2); // Do next column (which is a transposed row in second/horizontal pass) in++; out += 4; @@ -138,10 +142,10 @@ static void fadst4(const int16_t *input, int16_t *output) { s3 = x2 - x0 + x3; // 1-D transform scaling factor is sqrt(2). - output[0] = dct_const_round_shift(s0); - output[1] = dct_const_round_shift(s1); - output[2] = dct_const_round_shift(s2); - output[3] = dct_const_round_shift(s3); + output[0] = fdct_round_shift(s0); + output[1] = fdct_round_shift(s1); + output[2] = fdct_round_shift(s2); + output[3] = fdct_round_shift(s3); } static const transform_2d FHT_4[] = { @@ -151,32 +155,36 @@ static const transform_2d FHT_4[] = { { fadst4, fadst4 } // ADST_ADST = 3 }; -void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, - int stride, int tx_type) { - int16_t out[4 * 4]; - int16_t *outptr = &out[0]; - int i, j; - int16_t temp_in[4], temp_out[4]; - const transform_2d ht = FHT_4[tx_type]; +void vp9_fht4x4_c(const int16_t *input, int16_t *output, + int stride, int tx_type) { + if (tx_type == DCT_DCT) { + vp9_fdct4x4_c(input, output, stride); + } else { + int16_t out[4 * 4]; + int16_t *outptr = &out[0]; + int i, j; + int16_t temp_in[4], temp_out[4]; + const transform_2d ht = FHT_4[tx_type]; - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = input[j * stride + i] * 16; - if (i == 0 && temp_in[0]) - temp_in[0] += 1; - ht.cols(temp_in, temp_out); - for (j = 0; j < 4; ++j) - outptr[j * 4 + i] = temp_out[j]; - } + // Columns + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = input[j * stride + i] * 16; + if (i == 0 && temp_in[0]) + temp_in[0] += 1; + ht.cols(temp_in, temp_out); + for (j = 0; j < 4; ++j) + outptr[j * 4 + i] = temp_out[j]; + } - // Rows - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j + i * 4]; - ht.rows(temp_in, temp_out); - for (j = 0; j < 4; ++j) - output[j + i * 4] = (temp_out[j] + 1) >> 2; + // Rows + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j + i * 4]; + ht.rows(temp_in, temp_out); + for (j = 0; j < 4; ++j) + output[j + i * 4] = (temp_out[j] + 1) >> 2; + } } } @@ -204,16 +212,16 @@ static void fdct8(const int16_t *input, int16_t *output) { t1 = (x0 - x1) * cospi_16_64; t2 = x2 * cospi_24_64 + x3 * cospi_8_64; t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; - output[0] = dct_const_round_shift(t0); - output[2] = dct_const_round_shift(t2); - output[4] = dct_const_round_shift(t1); - output[6] = dct_const_round_shift(t3); + output[0] = fdct_round_shift(t0); + output[2] = fdct_round_shift(t2); + output[4] = fdct_round_shift(t1); + output[6] = fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64; - t2 = dct_const_round_shift(t0); - t3 = dct_const_round_shift(t1); + t2 = fdct_round_shift(t0); + t3 = fdct_round_shift(t1); // Stage 3 x0 = s4 + t2; @@ -226,10 +234,10 @@ static void fdct8(const int16_t *input, int16_t *output) { t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - output[1] = dct_const_round_shift(t0); - output[3] = dct_const_round_shift(t2); - output[5] = dct_const_round_shift(t1); - output[7] = dct_const_round_shift(t3); + output[1] = fdct_round_shift(t0); + output[3] = fdct_round_shift(t2); + output[5] = fdct_round_shift(t1); + output[7] = fdct_round_shift(t3); } void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { @@ -264,16 +272,16 @@ void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { t1 = (x0 - x1) * cospi_16_64; t2 = x2 * cospi_24_64 + x3 * cospi_8_64; t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; - output[0 * 8] = dct_const_round_shift(t0); - output[2 * 8] = dct_const_round_shift(t2); - output[4 * 8] = dct_const_round_shift(t1); - output[6 * 8] = dct_const_round_shift(t3); + output[0 * 8] = fdct_round_shift(t0); + output[2 * 8] = fdct_round_shift(t2); + output[4 * 8] = fdct_round_shift(t1); + output[6 * 8] = fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64; - t2 = dct_const_round_shift(t0); - t3 = dct_const_round_shift(t1); + t2 = fdct_round_shift(t0); + t3 = fdct_round_shift(t1); // Stage 3 x0 = s4 + t2; @@ -286,10 +294,10 @@ void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - output[1 * 8] = dct_const_round_shift(t0); - output[3 * 8] = dct_const_round_shift(t2); - output[5 * 8] = dct_const_round_shift(t1); - output[7 * 8] = dct_const_round_shift(t3); + output[1 * 8] = fdct_round_shift(t0); + output[3 * 8] = fdct_round_shift(t2); + output[5 * 8] = fdct_round_shift(t1); + output[7 * 8] = fdct_round_shift(t3); input++; output++; } @@ -307,7 +315,7 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { // The 2D transform is done with two passes which are actually pretty // similar. In the first one, we transform the columns and transpose // the results. In the second one, we transform the rows. To achieve that, - // as the first pass results are transposed, we tranpose the columns (that + // as the first pass results are transposed, we transpose the columns (that // is the transposed rows) and transpose the results (so that it goes back // in normal/row positions). int pass; @@ -388,16 +396,16 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { t1 = (x0 - x1) * cospi_16_64; t2 = x3 * cospi_8_64 + x2 * cospi_24_64; t3 = x3 * cospi_24_64 - x2 * cospi_8_64; - out[0] = dct_const_round_shift(t0); - out[4] = dct_const_round_shift(t2); - out[8] = dct_const_round_shift(t1); - out[12] = dct_const_round_shift(t3); + out[0] = fdct_round_shift(t0); + out[4] = fdct_round_shift(t2); + out[8] = fdct_round_shift(t1); + out[12] = fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64; - t2 = dct_const_round_shift(t0); - t3 = dct_const_round_shift(t1); + t2 = fdct_round_shift(t0); + t3 = fdct_round_shift(t1); // Stage 3 x0 = s4 + t2; @@ -410,22 +418,22 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - out[2] = dct_const_round_shift(t0); - out[6] = dct_const_round_shift(t2); - out[10] = dct_const_round_shift(t1); - out[14] = dct_const_round_shift(t3); + out[2] = fdct_round_shift(t0); + out[6] = fdct_round_shift(t2); + out[10] = fdct_round_shift(t1); + out[14] = fdct_round_shift(t3); } // Work on the next eight values; step1 -> odd_results { // step 2 temp1 = (step1[5] - step1[2]) * cospi_16_64; temp2 = (step1[4] - step1[3]) * cospi_16_64; - step2[2] = dct_const_round_shift(temp1); - step2[3] = dct_const_round_shift(temp2); + step2[2] = fdct_round_shift(temp1); + step2[3] = fdct_round_shift(temp2); temp1 = (step1[4] + step1[3]) * cospi_16_64; temp2 = (step1[5] + step1[2]) * cospi_16_64; - step2[4] = dct_const_round_shift(temp1); - step2[5] = dct_const_round_shift(temp2); + step2[4] = fdct_round_shift(temp1); + step2[5] = fdct_round_shift(temp2); // step 3 step3[0] = step1[0] + step2[3]; step3[1] = step1[1] + step2[2]; @@ -438,12 +446,12 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { // step 4 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; - step2[1] = dct_const_round_shift(temp1); - step2[2] = dct_const_round_shift(temp2); + step2[1] = fdct_round_shift(temp1); + step2[2] = fdct_round_shift(temp2); temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; - step2[5] = dct_const_round_shift(temp1); - step2[6] = dct_const_round_shift(temp2); + step2[5] = fdct_round_shift(temp1); + step2[6] = fdct_round_shift(temp2); // step 5 step1[0] = step3[0] + step2[1]; step1[1] = step3[0] - step2[1]; @@ -456,20 +464,20 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { // step 6 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; - out[1] = dct_const_round_shift(temp1); - out[9] = dct_const_round_shift(temp2); + out[1] = fdct_round_shift(temp1); + out[9] = fdct_round_shift(temp2); temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; - out[5] = dct_const_round_shift(temp1); - out[13] = dct_const_round_shift(temp2); + out[5] = fdct_round_shift(temp1); + out[13] = fdct_round_shift(temp2); temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; - out[3] = dct_const_round_shift(temp1); - out[11] = dct_const_round_shift(temp2); + out[3] = fdct_round_shift(temp1); + out[11] = fdct_round_shift(temp2); temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; - out[7] = dct_const_round_shift(temp1); - out[15] = dct_const_round_shift(temp2); + out[7] = fdct_round_shift(temp1); + out[15] = fdct_round_shift(temp2); } // Do next column (which is a transposed row in second/horizontal pass) in++; @@ -503,14 +511,14 @@ static void fadst8(const int16_t *input, int16_t *output) { s6 = cospi_26_64 * x6 + cospi_6_64 * x7; s7 = cospi_6_64 * x6 - cospi_26_64 * x7; - x0 = dct_const_round_shift(s0 + s4); - x1 = dct_const_round_shift(s1 + s5); - x2 = dct_const_round_shift(s2 + s6); - x3 = dct_const_round_shift(s3 + s7); - x4 = dct_const_round_shift(s0 - s4); - x5 = dct_const_round_shift(s1 - s5); - x6 = dct_const_round_shift(s2 - s6); - x7 = dct_const_round_shift(s3 - s7); + x0 = fdct_round_shift(s0 + s4); + x1 = fdct_round_shift(s1 + s5); + x2 = fdct_round_shift(s2 + s6); + x3 = fdct_round_shift(s3 + s7); + x4 = fdct_round_shift(s0 - s4); + x5 = fdct_round_shift(s1 - s5); + x6 = fdct_round_shift(s2 - s6); + x7 = fdct_round_shift(s3 - s7); // stage 2 s0 = x0; @@ -526,10 +534,10 @@ static void fadst8(const int16_t *input, int16_t *output) { x1 = s1 + s3; x2 = s0 - s2; x3 = s1 - s3; - x4 = dct_const_round_shift(s4 + s6); - x5 = dct_const_round_shift(s5 + s7); - x6 = dct_const_round_shift(s4 - s6); - x7 = dct_const_round_shift(s5 - s7); + x4 = fdct_round_shift(s4 + s6); + x5 = fdct_round_shift(s5 + s7); + x6 = fdct_round_shift(s4 - s6); + x7 = fdct_round_shift(s5 - s7); // stage 3 s2 = cospi_16_64 * (x2 + x3); @@ -537,10 +545,10 @@ static void fadst8(const int16_t *input, int16_t *output) { s6 = cospi_16_64 * (x6 + x7); s7 = cospi_16_64 * (x6 - x7); - x2 = dct_const_round_shift(s2); - x3 = dct_const_round_shift(s3); - x6 = dct_const_round_shift(s6); - x7 = dct_const_round_shift(s7); + x2 = fdct_round_shift(s2); + x3 = fdct_round_shift(s3); + x6 = fdct_round_shift(s6); + x7 = fdct_round_shift(s7); output[0] = x0; output[1] = - x4; @@ -559,30 +567,34 @@ static const transform_2d FHT_8[] = { { fadst8, fadst8 } // ADST_ADST = 3 }; -void vp9_short_fht8x8_c(const int16_t *input, int16_t *output, - int stride, int tx_type) { - int16_t out[64]; - int16_t *outptr = &out[0]; - int i, j; - int16_t temp_in[8], temp_out[8]; - const transform_2d ht = FHT_8[tx_type]; - - // Columns - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = input[j * stride + i] * 4; - ht.cols(temp_in, temp_out); - for (j = 0; j < 8; ++j) - outptr[j * 8 + i] = temp_out[j]; - } +void vp9_fht8x8_c(const int16_t *input, int16_t *output, + int stride, int tx_type) { + if (tx_type == DCT_DCT) { + vp9_fdct8x8_c(input, output, stride); + } else { + int16_t out[64]; + int16_t *outptr = &out[0]; + int i, j; + int16_t temp_in[8], temp_out[8]; + const transform_2d ht = FHT_8[tx_type]; + + // Columns + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = input[j * stride + i] * 4; + ht.cols(temp_in, temp_out); + for (j = 0; j < 8; ++j) + outptr[j * 8 + i] = temp_out[j]; + } - // Rows - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j + i * 8]; - ht.rows(temp_in, temp_out); - for (j = 0; j < 8; ++j) - output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; + // Rows + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j + i * 8]; + ht.rows(temp_in, temp_out); + for (j = 0; j < 8; ++j) + output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; + } } } @@ -693,16 +705,16 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { t1 = (x0 - x1) * cospi_16_64; t2 = x3 * cospi_8_64 + x2 * cospi_24_64; t3 = x3 * cospi_24_64 - x2 * cospi_8_64; - out[0] = dct_const_round_shift(t0); - out[4] = dct_const_round_shift(t2); - out[8] = dct_const_round_shift(t1); - out[12] = dct_const_round_shift(t3); + out[0] = fdct_round_shift(t0); + out[4] = fdct_round_shift(t2); + out[8] = fdct_round_shift(t1); + out[12] = fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64; - t2 = dct_const_round_shift(t0); - t3 = dct_const_round_shift(t1); + t2 = fdct_round_shift(t0); + t3 = fdct_round_shift(t1); // Stage 3 x0 = s4 + t2; @@ -715,21 +727,21 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - out[2] = dct_const_round_shift(t0); - out[6] = dct_const_round_shift(t2); - out[10] = dct_const_round_shift(t1); - out[14] = dct_const_round_shift(t3); + out[2] = fdct_round_shift(t0); + out[6] = fdct_round_shift(t2); + out[10] = fdct_round_shift(t1); + out[14] = fdct_round_shift(t3); } // step 2 temp1 = (step1[5] - step1[2]) * cospi_16_64; temp2 = (step1[4] - step1[3]) * cospi_16_64; - step2[2] = dct_const_round_shift(temp1); - step2[3] = dct_const_round_shift(temp2); + step2[2] = fdct_round_shift(temp1); + step2[3] = fdct_round_shift(temp2); temp1 = (step1[4] + step1[3]) * cospi_16_64; temp2 = (step1[5] + step1[2]) * cospi_16_64; - step2[4] = dct_const_round_shift(temp1); - step2[5] = dct_const_round_shift(temp2); + step2[4] = fdct_round_shift(temp1); + step2[5] = fdct_round_shift(temp2); // step 3 step3[0] = step1[0] + step2[3]; @@ -744,12 +756,12 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { // step 4 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; - step2[1] = dct_const_round_shift(temp1); - step2[2] = dct_const_round_shift(temp2); + step2[1] = fdct_round_shift(temp1); + step2[2] = fdct_round_shift(temp2); temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; - step2[5] = dct_const_round_shift(temp1); - step2[6] = dct_const_round_shift(temp2); + step2[5] = fdct_round_shift(temp1); + step2[6] = fdct_round_shift(temp2); // step 5 step1[0] = step3[0] + step2[1]; @@ -764,23 +776,23 @@ static void fdct16(const int16_t in[16], int16_t out[16]) { // step 6 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; - out[1] = dct_const_round_shift(temp1); - out[9] = dct_const_round_shift(temp2); + out[1] = fdct_round_shift(temp1); + out[9] = fdct_round_shift(temp2); temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; - out[5] = dct_const_round_shift(temp1); - out[13] = dct_const_round_shift(temp2); + out[5] = fdct_round_shift(temp1); + out[13] = fdct_round_shift(temp2); temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; - out[3] = dct_const_round_shift(temp1); - out[11] = dct_const_round_shift(temp2); + out[3] = fdct_round_shift(temp1); + out[11] = fdct_round_shift(temp2); temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; - out[7] = dct_const_round_shift(temp1); - out[15] = dct_const_round_shift(temp2); + out[7] = fdct_round_shift(temp1); + out[15] = fdct_round_shift(temp2); } static void fadst16(const int16_t *input, int16_t *output) { @@ -821,22 +833,22 @@ static void fadst16(const int16_t *input, int16_t *output) { s14 = x14 * cospi_29_64 + x15 * cospi_3_64; s15 = x14 * cospi_3_64 - x15 * cospi_29_64; - x0 = dct_const_round_shift(s0 + s8); - x1 = dct_const_round_shift(s1 + s9); - x2 = dct_const_round_shift(s2 + s10); - x3 = dct_const_round_shift(s3 + s11); - x4 = dct_const_round_shift(s4 + s12); - x5 = dct_const_round_shift(s5 + s13); - x6 = dct_const_round_shift(s6 + s14); - x7 = dct_const_round_shift(s7 + s15); - x8 = dct_const_round_shift(s0 - s8); - x9 = dct_const_round_shift(s1 - s9); - x10 = dct_const_round_shift(s2 - s10); - x11 = dct_const_round_shift(s3 - s11); - x12 = dct_const_round_shift(s4 - s12); - x13 = dct_const_round_shift(s5 - s13); - x14 = dct_const_round_shift(s6 - s14); - x15 = dct_const_round_shift(s7 - s15); + x0 = fdct_round_shift(s0 + s8); + x1 = fdct_round_shift(s1 + s9); + x2 = fdct_round_shift(s2 + s10); + x3 = fdct_round_shift(s3 + s11); + x4 = fdct_round_shift(s4 + s12); + x5 = fdct_round_shift(s5 + s13); + x6 = fdct_round_shift(s6 + s14); + x7 = fdct_round_shift(s7 + s15); + x8 = fdct_round_shift(s0 - s8); + x9 = fdct_round_shift(s1 - s9); + x10 = fdct_round_shift(s2 - s10); + x11 = fdct_round_shift(s3 - s11); + x12 = fdct_round_shift(s4 - s12); + x13 = fdct_round_shift(s5 - s13); + x14 = fdct_round_shift(s6 - s14); + x15 = fdct_round_shift(s7 - s15); // stage 2 s0 = x0; @@ -864,14 +876,14 @@ static void fadst16(const int16_t *input, int16_t *output) { x5 = s1 - s5; x6 = s2 - s6; x7 = s3 - s7; - x8 = dct_const_round_shift(s8 + s12); - x9 = dct_const_round_shift(s9 + s13); - x10 = dct_const_round_shift(s10 + s14); - x11 = dct_const_round_shift(s11 + s15); - x12 = dct_const_round_shift(s8 - s12); - x13 = dct_const_round_shift(s9 - s13); - x14 = dct_const_round_shift(s10 - s14); - x15 = dct_const_round_shift(s11 - s15); + x8 = fdct_round_shift(s8 + s12); + x9 = fdct_round_shift(s9 + s13); + x10 = fdct_round_shift(s10 + s14); + x11 = fdct_round_shift(s11 + s15); + x12 = fdct_round_shift(s8 - s12); + x13 = fdct_round_shift(s9 - s13); + x14 = fdct_round_shift(s10 - s14); + x15 = fdct_round_shift(s11 - s15); // stage 3 s0 = x0; @@ -895,18 +907,18 @@ static void fadst16(const int16_t *input, int16_t *output) { x1 = s1 + s3; x2 = s0 - s2; x3 = s1 - s3; - x4 = dct_const_round_shift(s4 + s6); - x5 = dct_const_round_shift(s5 + s7); - x6 = dct_const_round_shift(s4 - s6); - x7 = dct_const_round_shift(s5 - s7); + x4 = fdct_round_shift(s4 + s6); + x5 = fdct_round_shift(s5 + s7); + x6 = fdct_round_shift(s4 - s6); + x7 = fdct_round_shift(s5 - s7); x8 = s8 + s10; x9 = s9 + s11; x10 = s8 - s10; x11 = s9 - s11; - x12 = dct_const_round_shift(s12 + s14); - x13 = dct_const_round_shift(s13 + s15); - x14 = dct_const_round_shift(s12 - s14); - x15 = dct_const_round_shift(s13 - s15); + x12 = fdct_round_shift(s12 + s14); + x13 = fdct_round_shift(s13 + s15); + x14 = fdct_round_shift(s12 - s14); + x15 = fdct_round_shift(s13 - s15); // stage 4 s2 = (- cospi_16_64) * (x2 + x3); @@ -918,14 +930,14 @@ static void fadst16(const int16_t *input, int16_t *output) { s14 = (- cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); - x2 = dct_const_round_shift(s2); - x3 = dct_const_round_shift(s3); - x6 = dct_const_round_shift(s6); - x7 = dct_const_round_shift(s7); - x10 = dct_const_round_shift(s10); - x11 = dct_const_round_shift(s11); - x14 = dct_const_round_shift(s14); - x15 = dct_const_round_shift(s15); + x2 = fdct_round_shift(s2); + x3 = fdct_round_shift(s3); + x6 = fdct_round_shift(s6); + x7 = fdct_round_shift(s7); + x10 = fdct_round_shift(s10); + x11 = fdct_round_shift(s11); + x14 = fdct_round_shift(s14); + x15 = fdct_round_shift(s15); output[0] = x0; output[1] = - x8; @@ -952,31 +964,34 @@ static const transform_2d FHT_16[] = { { fadst16, fadst16 } // ADST_ADST = 3 }; -void vp9_short_fht16x16_c(const int16_t *input, int16_t *output, - int stride, int tx_type) { - int16_t out[256]; - int16_t *outptr = &out[0]; - int i, j; - int16_t temp_in[16], temp_out[16]; - const transform_2d ht = FHT_16[tx_type]; - - // Columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = input[j * stride + i] * 4; - ht.cols(temp_in, temp_out); - for (j = 0; j < 16; ++j) - outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; -// outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; - } +void vp9_fht16x16_c(const int16_t *input, int16_t *output, + int stride, int tx_type) { + if (tx_type == DCT_DCT) { + vp9_fdct16x16_c(input, output, stride); + } else { + int16_t out[256]; + int16_t *outptr = &out[0]; + int i, j; + int16_t temp_in[16], temp_out[16]; + const transform_2d ht = FHT_16[tx_type]; + + // Columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = input[j * stride + i] * 4; + ht.cols(temp_in, temp_out); + for (j = 0; j < 16; ++j) + outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; + } - // Rows - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j + i * 16]; - ht.rows(temp_in, temp_out); - for (j = 0; j < 16; ++j) - output[j + i * 16] = temp_out[j]; + // Rows + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j + i * 16]; + ht.rows(temp_in, temp_out); + for (j = 0; j < 16; ++j) + output[j + i * 16] = temp_out[j]; + } } } @@ -991,7 +1006,7 @@ static INLINE int half_round_shift(int input) { return rv; } -static void dct32_1d(const int *input, int *output, int round) { +static void fdct32(const int *input, int *output, int round) { int step[32]; // Stage 1 step[0] = input[0] + input[(32 - 1)]; @@ -1323,7 +1338,7 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; - dct32_1d(temp_in, temp_out, 0); + fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } @@ -1333,13 +1348,13 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; - dct32_1d(temp_in, temp_out, 0); + fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; } } -// Note that although we use dct_32_round in dct32_1d computation flow, +// Note that although we use dct_32_round in dct32 computation flow, // this 2d fdct32x32 for rate-distortion optimization loop is operating // within 16 bits precision. void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { @@ -1351,7 +1366,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; - dct32_1d(temp_in, temp_out, 0); + fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) // TODO(cd): see quality impact of only doing // output[j * 32 + i] = (temp_out[j] + 1) >> 2; @@ -1364,32 +1379,8 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; - dct32_1d(temp_in, temp_out, 1); + fdct32(temp_in, temp_out, 1); for (j = 0; j < 32; ++j) out[j + i * 32] = temp_out[j]; } } - -void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride) { - if (tx_type == DCT_DCT) - vp9_fdct4x4(input, output, stride); - else - vp9_short_fht4x4(input, output, stride, tx_type); -} - -void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride) { - if (tx_type == DCT_DCT) - vp9_fdct8x8(input, output, stride); - else - vp9_short_fht8x8(input, output, stride, tx_type); -} - -void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride) { - if (tx_type == DCT_DCT) - vp9_fdct16x16(input, output, stride); - else - vp9_short_fht16x16(input, output, stride, tx_type); -} |