summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJyri Sarha <jsarha@ti.com>2011-08-03 11:20:21 -0700
committerEric Laurent <elaurent@google.com>2011-08-03 14:13:50 -0700
commit35318dd943257760780f28b95b6ca99a79886c3d (patch)
treebbf09f34c55fabb8ed02f3b76acf8ecdb15c9b9a
parent85c3efbb33382cfa20e0110b3b3e318686e91c4e (diff)
downloadandroid_external_speex-35318dd943257760780f28b95b6ca99a79886c3d.tar.gz
android_external_speex-35318dd943257760780f28b95b6ca99a79886c3d.tar.bz2
android_external_speex-35318dd943257760780f28b95b6ca99a79886c3d.zip
Issue 5048376 Native crash while recording a video
Fixed ARM neon resampler optimization. Change-Id: I0a1e8ec01311b455495ae9d0663e87a35e2ab977
-rw-r--r--Android.mk2
-rw-r--r--libspeex/resample_neon.h103
2 files changed, 54 insertions, 51 deletions
diff --git a/Android.mk b/Android.mk
index 72c8abe..d33e184 100644
--- a/Android.mk
+++ b/Android.mk
@@ -52,7 +52,7 @@ LOCAL_CFLAGS += -DEXPORT= -DFIXED_POINT -DRESAMPLE_FORCE_FULL_SINC_TABLE
LOCAL_CFLAGS += -O3 -fstrict-aliasing -fprefetch-loop-arrays
ifeq ($(ARCH_ARM_HAVE_NEON),true)
-# LOCAL_CFLAGS += -D_USE_NEON
+LOCAL_CFLAGS += -D_USE_NEON
endif
LOCAL_C_INCLUDES += \
diff --git a/libspeex/resample_neon.h b/libspeex/resample_neon.h
index 65741c1..784ee2f 100644
--- a/libspeex/resample_neon.h
+++ b/libspeex/resample_neon.h
@@ -37,61 +37,64 @@
#include <arm_neon.h>
-/* NOTE: This code only works with following filter lengths:
- 8 or n*16 (where n = 1,2,3...)
- The current resampler quality presets follow the above rule.
-*/
-
#ifdef FIXED_POINT
+
+
#define OVERRIDE_INNER_PRODUCT_SINGLE
static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
{
int32_t ret;
- if (len > 8) {
- asm volatile (" vld1.16 {d16, d17, d18, d19}, [%[a]]!\n"
- " vld1.16 {d20, d21, d22, d23}, [%[b]]!\n"
- " subs %[len], %[len], #16\n"
- " vmull.s16 q0, d16, d20\n"
- " vmlal.s16 q0, d17, d21\n"
- " vmlal.s16 q0, d18, d22\n"
- " vmlal.s16 q0, d19, d23\n"
- " beq 2f\n"
- "1:"
- " vld1.16 {d16, d17, d18, d19}, [%[a]]!\n"
- " vld1.16 {d20, d21, d22, d23}, [%[b]]!\n"
- " subs %[len], %[len], #16\n"
- " vmlal.s16 q0, d16, d20\n"
- " vmlal.s16 q0, d17, d21\n"
- " vmlal.s16 q0, d18, d22\n"
- " vmlal.s16 q0, d19, d23\n"
- " bne 1b\n"
- "2:"
- " vaddl.s32 q0, d0, d1\n"
- " vadd.s64 d0, d0, d1\n"
- " vqmovn.s64 d0, q0\n"
- " vqrshrn.s32 d0, q0, #15\n"
- " vmov.s16 %[ret],d0[0]\n"
- : [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b),
- [len] "+r" (len)
- :
- : "cc", "q0",
- "d16", "d17", "d18", "d19",
- "d20", "d21", "d22", "d23");
- }
- else {
- asm volatile ("vld1.16 {d4, d5}, [%[a]]\n"
- "vld1.16 {d6, d7}, [%[b]]\n"
- "vmull.s16 q0, d4, d6\n"
- "vmlal.s16 q0, d5, d7\n"
- "vaddl.s32 q0, d0, d1\n"
- "vadd.s64 d0, d0, d1\n"
- "vqmovn.s64 d0, q0\n"
- "vqrshrn.s32 d0, q0, #15\n"
- "vmov.s16 %[ret],d0[0]\n"
- : [ret] "=&r" (ret)
- : [a] "r" (a), [b] "r" (b)
- : "q0", "d4", "d5", "d6", "d7");
- }
+ uint32_t remainder = len % 16;
+ len = len - remainder;
+
+ asm volatile (" cmp %[len], #0\n"
+ " bne 1f\n"
+ " vld1.16 {d16}, [%[a]]!\n"
+ " vld1.16 {d20}, [%[b]]!\n"
+ " subs %[remainder], %[remainder], #4\n"
+ " vmull.s16 q0, d16, d20\n"
+ " beq 5f\n"
+ " b 4f\n"
+ "1:"
+ " vld1.16 {d16, d17, d18, d19}, [%[a]]!\n"
+ " vld1.16 {d20, d21, d22, d23}, [%[b]]!\n"
+ " subs %[len], %[len], #16\n"
+ " vmull.s16 q0, d16, d20\n"
+ " vmlal.s16 q0, d17, d21\n"
+ " vmlal.s16 q0, d18, d22\n"
+ " vmlal.s16 q0, d19, d23\n"
+ " beq 3f\n"
+ "2:"
+ " vld1.16 {d16, d17, d18, d19}, [%[a]]!\n"
+ " vld1.16 {d20, d21, d22, d23}, [%[b]]!\n"
+ " subs %[len], %[len], #16\n"
+ " vmlal.s16 q0, d16, d20\n"
+ " vmlal.s16 q0, d17, d21\n"
+ " vmlal.s16 q0, d18, d22\n"
+ " vmlal.s16 q0, d19, d23\n"
+ " bne 2b\n"
+ "3:"
+ " cmp %[remainder], #0\n"
+ " beq 5f\n"
+ "4:"
+ " vld1.16 {d16}, [%[a]]!\n"
+ " vld1.16 {d20}, [%[b]]!\n"
+ " subs %[remainder], %[remainder], #4\n"
+ " vmlal.s16 q0, d16, d20\n"
+ " bne 4b\n"
+ "5:"
+ " vaddl.s32 q0, d0, d1\n"
+ " vadd.s64 d0, d0, d1\n"
+ " vqmovn.s64 d0, q0\n"
+ " vqrshrn.s32 d0, q0, #15\n"
+ " vmov.s16 %[ret], d0[0]\n"
+ : [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b),
+ [len] "+r" (len), [remainder] "+r" (remainder)
+ :
+ : "cc", "q0",
+ "d16", "d17", "d18", "d19",
+ "d20", "d21", "d22", "d23");
return ret;
}
+
#endif