diff options
author | hkuang <hkuang@google.com> | 2013-11-07 15:50:31 -0800 |
---|---|---|
committer | hkuang <hkuang@google.com> | 2013-11-08 11:40:06 -0800 |
commit | 5ae7ac49f08a179e4f054d99fcfc9dce78d26e58 (patch) | |
tree | 0d891d2cbbac4c3da6fd15a25bf8797b29b31994 /libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c | |
parent | e6eeaaa14ccef4c0938fcce21c54979204041a30 (diff) | |
download | android_external_libvpx-5ae7ac49f08a179e4f054d99fcfc9dce78d26e58.tar.gz android_external_libvpx-5ae7ac49f08a179e4f054d99fcfc9dce78d26e58.tar.bz2 android_external_libvpx-5ae7ac49f08a179e4f054d99fcfc9dce78d26e58.zip |
Roll latest libvpx into Android.
The lastest libvpx just added multithread tile decoding support.
Checkout is from master: abdefeaa89a0908327518e5ca75c935c66b2e1aa
Bug:11576718
Change-Id: Icbe5430633e179b8dc6d419e280ad7ebd3cad4a0
Diffstat (limited to 'libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c')
-rw-r--r-- | libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c | 91 |
1 files changed, 47 insertions, 44 deletions
diff --git a/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c b/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c index 3e3e400..0b9fc09 100644 --- a/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c +++ b/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c @@ -11,45 +11,47 @@ #include "./vp9_rtcd.h" #include "vp9/common/vp9_common.h" -extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input, - int16_t *output, - int output_stride); -extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src, - int16_t *output, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride); -extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input, - int16_t *output, - int output_stride); -extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src, - int16_t *output, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride); -extern void save_neon_registers(); -extern void restore_neon_registers(); - - -void vp9_short_idct16x16_add_neon(int16_t *input, - uint8_t *dest, int dest_stride) { +void vp9_idct16x16_256_add_neon_pass1(const int16_t *input, + int16_t *output, + int output_stride); +void vp9_idct16x16_256_add_neon_pass2(const int16_t *src, + int16_t *output, + int16_t *pass1Output, + int16_t skip_adding, + uint8_t *dest, + int dest_stride); +void vp9_idct16x16_10_add_neon_pass1(const int16_t *input, + int16_t *output, + int output_stride); +void vp9_idct16x16_10_add_neon_pass2(const int16_t *src, + int16_t *output, + int16_t *pass1Output, + int16_t skip_adding, + uint8_t *dest, + int dest_stride); + +/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ +extern void vp9_push_neon(int64_t *store); +extern void vp9_pop_neon(int64_t *store); + +void vp9_idct16x16_256_add_neon(const int16_t *input, + uint8_t *dest, int dest_stride) { + int64_t store_reg[8]; int16_t pass1_output[16*16] = {0}; int16_t row_idct_output[16*16] = {0}; // save d8-d15 register values. - save_neon_registers(); + vp9_push_neon(store_reg); /* Parallel idct on the upper 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8); + vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. - vp9_short_idct16x16_add_neon_pass2(input+1, + vp9_idct16x16_256_add_neon_pass2(input+1, row_idct_output, pass1_output, 0, @@ -59,12 +61,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input, /* Parallel idct on the lower 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8); + vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. - vp9_short_idct16x16_add_neon_pass2(input+8*16+1, + vp9_idct16x16_256_add_neon_pass2(input+8*16+1, row_idct_output+8, pass1_output, 0, @@ -74,12 +76,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input, /* Parallel idct on the left 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8); + vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. - vp9_short_idct16x16_add_neon_pass2(row_idct_output+1, + vp9_idct16x16_256_add_neon_pass2(row_idct_output+1, row_idct_output, pass1_output, 1, @@ -89,12 +91,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input, /* Parallel idct on the right 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); + vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. - vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1, + vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, row_idct_output+8, pass1_output, 1, @@ -102,28 +104,29 @@ void vp9_short_idct16x16_add_neon(int16_t *input, dest_stride); // restore d8-d15 register values. - restore_neon_registers(); + vp9_pop_neon(store_reg); return; } -void vp9_short_idct10_16x16_add_neon(int16_t *input, - uint8_t *dest, int dest_stride) { +void vp9_idct16x16_10_add_neon(const int16_t *input, + uint8_t *dest, int dest_stride) { + int64_t store_reg[8]; int16_t pass1_output[16*16] = {0}; int16_t row_idct_output[16*16] = {0}; // save d8-d15 register values. - save_neon_registers(); + vp9_push_neon(store_reg); /* Parallel idct on the upper 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8); + vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. - vp9_short_idct10_16x16_add_neon_pass2(input+1, + vp9_idct16x16_10_add_neon_pass2(input+1, row_idct_output, pass1_output, 0, @@ -135,12 +138,12 @@ void vp9_short_idct10_16x16_add_neon(int16_t *input, /* Parallel idct on the left 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8); + vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. - vp9_short_idct16x16_add_neon_pass2(row_idct_output+1, + vp9_idct16x16_256_add_neon_pass2(row_idct_output+1, row_idct_output, pass1_output, 1, @@ -150,12 +153,12 @@ void vp9_short_idct10_16x16_add_neon(int16_t *input, /* Parallel idct on the right 8 columns */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); + vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7. // Then add the result to the destination data. - vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1, + vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, row_idct_output+8, pass1_output, 1, @@ -163,7 +166,7 @@ void vp9_short_idct10_16x16_add_neon(int16_t *input, dest_stride); // restore d8-d15 register values. - restore_neon_registers(); + vp9_pop_neon(store_reg); return; } |