aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libavcodec/aac.h1
-rw-r--r--libavcodec/aacdec.c3
-rw-r--r--libavcodec/acelp_pitch_delay.c4
-rw-r--r--libavcodec/acelp_vectors.c6
-rw-r--r--libavcodec/amrnbdec.c10
-rw-r--r--libavcodec/amrwbdec.c23
-rw-r--r--libavcodec/arm/dsputil_init_neon.c3
-rw-r--r--libavcodec/arm/dsputil_neon.S13
-rw-r--r--libavcodec/arm/h264pred_init_arm.c1
-rw-r--r--libavcodec/arm/vp3dsp_init_arm.c1
-rw-r--r--libavcodec/arm/vp8dsp_init_arm.c1
-rw-r--r--libavcodec/arm/vp8dsp_init_armv6.c2
-rw-r--r--libavcodec/arm/vp8dsp_init_neon.c2
-rw-r--r--libavcodec/dsputil.c12
-rw-r--r--libavcodec/dsputil.h18
-rw-r--r--libavcodec/qcelpdec.c17
-rw-r--r--libavcodec/ra288.c4
-rw-r--r--libavcodec/sipr.c15
-rw-r--r--libavcodec/sipr16k.c8
-rw-r--r--libavcodec/wmavoice.c16
-rw-r--r--libavcodec/x86/dsputil.asm26
-rw-r--r--libavcodec/x86/dsputil_mmx.c4
-rw-r--r--libavutil/arm/float_dsp_init_neon.c3
-rw-r--r--libavutil/arm/float_dsp_neon.S13
-rw-r--r--libavutil/float_dsp.c12
-rw-r--r--libavutil/float_dsp.h22
-rw-r--r--libavutil/x86/float_dsp.asm28
-rw-r--r--libavutil/x86/float_dsp_init.c3
28 files changed, 141 insertions, 130 deletions
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index 9e9fa0ea64..5b98856e67 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -292,7 +292,6 @@ typedef struct AACContext {
FFTContext mdct;
FFTContext mdct_small;
FFTContext mdct_ltp;
- DSPContext dsp;
FmtConvertContext fmt_conv;
AVFloatDSPContext fdsp;
int random_state;
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index fba7de501d..235ea4670c 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -928,7 +928,6 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
ff_aac_sbr_init();
- ff_dsputil_init(&ac->dsp, avctx);
ff_fmt_convert_init(&ac->fmt_conv, avctx);
avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
@@ -1394,7 +1393,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
cfo[k] = ac->random_state;
}
- band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
+ band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len);
scale = sf[idx] / sqrtf(band_energy);
ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
}
diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c
index 95bcce7a4a..c005c4b4e8 100644
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@@ -21,10 +21,10 @@
*/
#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
#include "libavutil/libm.h"
#include "libavutil/mathematics.h"
#include "avcodec.h"
-#include "dsputil.h"
#include "acelp_pitch_delay.h"
#include "celp_math.h"
@@ -132,7 +132,7 @@ float ff_amr_set_fixed_gain(float fixed_gain_factor, float fixed_mean_energy,
// Note 10^(0.05 * -10log(average x2)) = 1/sqrt((average x2)).
float val = fixed_gain_factor *
exp2f(M_LOG2_10 * 0.05 *
- (ff_scalarproduct_float_c(pred_table, prediction_error, 4) +
+ (avpriv_scalarproduct_float_c(pred_table, prediction_error, 4) +
energy_mean)) /
sqrtf(fixed_mean_energy);
diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
index aadacb4283..c9d6f877f6 100644
--- a/libavcodec/acelp_vectors.c
+++ b/libavcodec/acelp_vectors.c
@@ -23,8 +23,8 @@
#include <inttypes.h>
#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
#include "avcodec.h"
-#include "dsputil.h"
#include "acelp_vectors.h"
const uint8_t ff_fc_2pulses_9bits_track1[16] =
@@ -203,7 +203,7 @@ void ff_adaptive_gain_control(float *out, const float *in, float speech_energ,
int size, float alpha, float *gain_mem)
{
int i;
- float postfilter_energ = ff_scalarproduct_float_c(in, in, size);
+ float postfilter_energ = avpriv_scalarproduct_float_c(in, in, size);
float gain_scale_factor = 1.0;
float mem = *gain_mem;
@@ -224,7 +224,7 @@ void ff_scale_vector_to_given_sum_of_squares(float *out, const float *in,
float sum_of_squares, const int n)
{
int i;
- float scalefactor = ff_scalarproduct_float_c(in, in, n);
+ float scalefactor = avpriv_scalarproduct_float_c(in, in, n);
if (scalefactor)
scalefactor = sqrt(sum_of_squares / scalefactor);
for (i = 0; i < n; i++)
diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c
index 40bc2530e6..7399c9ffa4 100644
--- a/libavcodec/amrnbdec.c
+++ b/libavcodec/amrnbdec.c
@@ -44,8 +44,8 @@
#include <math.h>
#include "libavutil/channel_layout.h"
+#include "libavutil/float_dsp.h"
#include "avcodec.h"
-#include "dsputil.h"
#include "libavutil/common.h"
#include "libavutil/avassert.h"
#include "celp_math.h"
@@ -810,7 +810,7 @@ static int synthesis(AMRContext *p, float *lpc,
// emphasize pitch vector contribution
if (p->pitch_gain[4] > 0.5 && !overflow) {
float energy = p->celpm_ctx.dot_productf(excitation, excitation,
- AMR_SUBFRAME_SIZE);
+ AMR_SUBFRAME_SIZE);
float pitch_factor =
p->pitch_gain[4] *
(p->cur_frame_mode == MODE_12k2 ?
@@ -911,7 +911,7 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out)
float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input
float speech_gain = p->celpm_ctx.dot_productf(samples, samples,
- AMR_SUBFRAME_SIZE);
+ AMR_SUBFRAME_SIZE);
float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter
const float *gamma_n, *gamma_d; // Formant filter factor table
@@ -1018,8 +1018,8 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
p->fixed_gain[4] =
ff_amr_set_fixed_gain(fixed_gain_factor,
p->celpm_ctx.dot_productf(p->fixed_vector,
- p->fixed_vector,
- AMR_SUBFRAME_SIZE) /
+ p->fixed_vector,
+ AMR_SUBFRAME_SIZE) /
AMR_SUBFRAME_SIZE,
p->prediction_error,
energy_mean[p->cur_frame_mode], energy_pred_fac);
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index fa910466d4..f06d40b960 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -26,10 +26,10 @@
#include "libavutil/channel_layout.h"
#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
#include "libavutil/lfg.h"
#include "avcodec.h"
-#include "dsputil.h"
#include "lsp.h"
#include "celp_filters.h"
#include "celp_math.h"
@@ -612,11 +612,11 @@ static float voice_factor(float *p_vector, float p_gain,
CELPMContext *ctx)
{
double p_ener = (double) ctx->dot_productf(p_vector, p_vector,
- AMRWB_SFR_SIZE) *
- p_gain * p_gain;
+ AMRWB_SFR_SIZE) *
+ p_gain * p_gain;
double f_ener = (double) ctx->dot_productf(f_vector, f_vector,
- AMRWB_SFR_SIZE) *
- f_gain * f_gain;
+ AMRWB_SFR_SIZE) *
+ f_gain * f_gain;
return (p_ener - f_ener) / (p_ener + f_ener);
}
@@ -785,7 +785,7 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) {
int i;
float energy = ctx->celpm_ctx.dot_productf(excitation, excitation,
- AMRWB_SFR_SIZE);
+ AMRWB_SFR_SIZE);
// XXX: Weird part in both ref code and spec. A unknown parameter
// {beta} seems to be identical to the current pitch gain
@@ -846,8 +846,8 @@ static void upsample_5_4(float *out, const float *in, int o_size, CELPMContext *
for (k = 1; k < 5; k++) {
out[i] = ctx->dot_productf(in0 + int_part,
- upsample_fir[4 - frac_part],
- UPS_MEM_SIZE);
+ upsample_fir[4 - frac_part],
+ UPS_MEM_SIZE);
int_part++;
frac_part--;
i++;
@@ -893,7 +893,8 @@ static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
const float *synth_exc, float hb_gain)
{
int i;
- float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
+ float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc,
+ AMRWB_SFR_SIZE);
/* Generate a white-noise excitation */
for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
@@ -1189,8 +1190,8 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
ctx->fixed_gain[0] =
ff_amr_set_fixed_gain(fixed_gain_factor,
ctx->celpm_ctx.dot_productf(ctx->fixed_vector,
- ctx->fixed_vector,
- AMRWB_SFR_SIZE) /
+ ctx->fixed_vector,
+ AMRWB_SFR_SIZE) /
AMRWB_SFR_SIZE,
ctx->prediction_error,
ENERGY_MEAN, energy_pred_fac);
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index c67bbed681..09248cc5fe 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -142,8 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
-float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
-
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
int len);
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
@@ -293,7 +291,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
}
- c->scalarproduct_float = ff_scalarproduct_float_neon;
c->vector_clipf = ff_vector_clipf_neon;
c->vector_clip_int32 = ff_vector_clip_int32_neon;
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index 31ad72ff89..5e15d6f183 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -531,19 +531,6 @@ function ff_add_pixels_clamped_neon, export=1
bx lr
endfunc
-function ff_scalarproduct_float_neon, export=1
- vmov.f32 q2, #0.0
-1: vld1.32 {q0},[r0,:128]!
- vld1.32 {q1},[r1,:128]!
- vmla.f32 q2, q0, q1
- subs r2, r2, #4
- bgt 1b
- vadd.f32 d0, d4, d5
- vpadd.f32 d0, d0, d0
-NOVFP vmov.32 r0, d0[0]
- bx lr
-endfunc
-
function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0]
diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c
index 31714d73d5..20c1fde845 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -21,6 +21,7 @@
#include <stdint.h>
#include "libavutil/arm/cpu.h"
+#include "libavcodec/avcodec.h"
#include "libavcodec/h264pred.h"
void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
diff --git a/libavcodec/arm/vp3dsp_init_arm.c b/libavcodec/arm/vp3dsp_init_arm.c
index 90fc34b218..4ce75fe651 100644
--- a/libavcodec/arm/vp3dsp_init_arm.c
+++ b/libavcodec/arm/vp3dsp_init_arm.c
@@ -21,6 +21,7 @@
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h"
+#include "libavcodec/dsputil.h"
#include "libavcodec/vp3dsp.h"
void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
diff --git a/libavcodec/arm/vp8dsp_init_arm.c b/libavcodec/arm/vp8dsp_init_arm.c
index 5c84ed3231..d360ae3e6d 100644
--- a/libavcodec/arm/vp8dsp_init_arm.c
+++ b/libavcodec/arm/vp8dsp_init_arm.c
@@ -18,6 +18,7 @@
#include <stdint.h>
+#include "libavutil/attributes.h"
#include "libavutil/arm/cpu.h"
#include "libavcodec/vp8dsp.h"
#include "vp8dsp.h"
diff --git a/libavcodec/arm/vp8dsp_init_armv6.c b/libavcodec/arm/vp8dsp_init_armv6.c
index 85a803af83..c3d024f5cb 100644
--- a/libavcodec/arm/vp8dsp_init_armv6.c
+++ b/libavcodec/arm/vp8dsp_init_armv6.c
@@ -17,6 +17,8 @@
*/
#include <stdint.h>
+
+#include "libavutil/attributes.h"
#include "libavcodec/vp8dsp.h"
#include "vp8dsp.h"
diff --git a/libavcodec/arm/vp8dsp_init_neon.c b/libavcodec/arm/vp8dsp_init_neon.c
index dbe5b9f961..965243c3e4 100644
--- a/libavcodec/arm/vp8dsp_init_neon.c
+++ b/libavcodec/arm/vp8dsp_init_neon.c
@@ -17,6 +17,8 @@
*/
#include <stdint.h>
+
+#include "libavutil/attributes.h"
#include "libavcodec/vp8dsp.h"
#include "vp8dsp.h"
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 49e56cc0d4..98d4596d82 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2483,17 +2483,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
-float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
-{
- float p = 0.0;
- int i;
-
- for (i = 0; i < len; i++)
- p += v1[i] * v2[i];
-
- return p;
-}
-
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
uint32_t maxi, uint32_t maxisign)
{
@@ -2875,7 +2864,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
c->apply_window_int16 = apply_window_int16_c;
c->vector_clip_int32 = vector_clip_int32_c;
- c->scalarproduct_float = ff_scalarproduct_float_c;
c->shrink[0]= av_image_copy_plane;
c->shrink[1]= ff_shrink22;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 8953d972bd..0b27ebd54b 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -360,13 +360,6 @@ typedef struct DSPContext {
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
- /**
- * Calculate the scalar product of two vectors of floats.
- * @param v1 first vector, 16-byte aligned
- * @param v2 second vector, 16-byte aligned
- * @param len length of vectors, multiple of 4
- */
- float (*scalarproduct_float)(const float *v1, const float *v2, int len);
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
@@ -474,17 +467,6 @@ attribute_deprecated void dsputil_init(DSPContext* c, AVCodecContext *avctx);
int ff_check_alignment(void);
/**
- * Return the scalar product of two vectors.
- *
- * @param v1 first input vector
- * @param v2 first input vector
- * @param len number of elements
- *
- * @return sum of elementwise products
- */
-float ff_scalarproduct_float_c(const float *v1, const float *v2, int len);
-
-/**
* permute block according to permuatation.
* @param last last non zero element in scantable order
*/
diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c
index aff872563c..b8e2290378 100644
--- a/libavcodec/qcelpdec.c
+++ b/libavcodec/qcelpdec.c
@@ -30,10 +30,10 @@
#include <stddef.h>
#include "libavutil/channel_layout.h"
+#include "libavutil/float_dsp.h"
#include "avcodec.h"
#include "internal.h"
#include "get_bits.h"
-#include "dsputil.h"
#include "qcelpdata.h"
#include "celp_filters.h"
#include "acelp_filters.h"
@@ -400,12 +400,10 @@ static void apply_gain_ctrl(float *v_out, const float *v_ref, const float *v_in)
{
int i;
- for (i = 0; i < 160; i += 40)
- ff_scale_vector_to_given_sum_of_squares(v_out + i, v_in + i,
- ff_scalarproduct_float_c(v_ref + i,
- v_ref + i,
- 40),
- 40);
+ for (i = 0; i < 160; i += 40) {
+ float res = avpriv_scalarproduct_float_c(v_ref + i, v_ref + i, 40);
+ ff_scale_vector_to_given_sum_of_squares(v_out + i, v_in + i, res, 40);
+ }
}
/**
@@ -680,8 +678,9 @@ static void postfilter(QCELPContext *q, float *samples, float *lpc)
ff_tilt_compensation(&q->postfilter_tilt_mem, 0.3, pole_out + 10, 160);
ff_adaptive_gain_control(samples, pole_out + 10,
- ff_scalarproduct_float_c(q->formant_mem + 10,
- q->formant_mem + 10, 160),
+ avpriv_scalarproduct_float_c(q->formant_mem + 10,
+ q->formant_mem + 10,
+ 160),
160, 0.9375, &q->postfilter_agc_mem);
}
diff --git a/libavcodec/ra288.c b/libavcodec/ra288.c
index 7f0e2f71a8..8bafc010e1 100644
--- a/libavcodec/ra288.c
+++ b/libavcodec/ra288.c
@@ -84,7 +84,7 @@ static av_cold int ra288_decode_init(AVCodecContext *avctx)
static void convolve(float *tgt, const float *src, int len, int n)
{
for (; n >= 0; n--)
- tgt[n] = ff_scalarproduct_float_c(src, src - n, len);
+ tgt[n] = avpriv_scalarproduct_float_c(src, src - n, len);
}
@@ -113,7 +113,7 @@ static void decode(RA288Context *ractx, float gain, int cb_coef)
for (i=0; i < 5; i++)
buffer[i] = codetable[cb_coef][i] * sumsum;
- sum = ff_scalarproduct_float_c(buffer, buffer, 5);
+ sum = avpriv_scalarproduct_float_c(buffer, buffer, 5);
sum = FFMAX(sum, 5. / (1<<24));
diff --git a/libavcodec/sipr.c b/libavcodec/sipr.c
index 06b22c0c31..1883908a32 100644
--- a/libavcodec/sipr.c
+++ b/libavcodec/sipr.c
@@ -26,11 +26,11 @@
#include <string.h>
#include "libavutil/channel_layout.h"
+#include "libavutil/float_dsp.h"
#include "libavutil/mathematics.h"
#include "avcodec.h"
#define BITSTREAM_READER_LE
#include "get_bits.h"
-#include "dsputil.h"
#include "internal.h"
#include "lsp.h"
@@ -411,9 +411,10 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params,
convolute_with_sparse(fixed_vector, &fixed_cb, impulse_response,
SUBFR_SIZE);
- avg_energy =
- (0.01 + ff_scalarproduct_float_c(fixed_vector, fixed_vector, SUBFR_SIZE)) /
- SUBFR_SIZE;
+ avg_energy = (0.01 + avpriv_scalarproduct_float_c(fixed_vector,
+ fixed_vector,
+ SUBFR_SIZE)) /
+ SUBFR_SIZE;
ctx->past_pitch_gain = pitch_gain = gain_cb[params->gc_index[i]][0];
@@ -454,9 +455,9 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params,
if (ctx->mode == MODE_5k0) {
for (i = 0; i < subframe_count; i++) {
- float energy = ff_scalarproduct_float_c(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
- ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
- SUBFR_SIZE);
+ float energy = avpriv_scalarproduct_float_c(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
+ ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
+ SUBFR_SIZE);
ff_adaptive_gain_control(&synth[i * SUBFR_SIZE],
&synth[i * SUBFR_SIZE], energy,
SUBFR_SIZE, 0.9, &ctx->postfilter_agc);
diff --git a/libavcodec/sipr16k.c b/libavcodec/sipr16k.c
index c2e090bb0a..fbf749759c 100644
--- a/libavcodec/sipr16k.c
+++ b/libavcodec/sipr16k.c
@@ -25,8 +25,8 @@
#include "sipr.h"
#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
#include "libavutil/mathematics.h"
-#include "dsputil.h"
#include "lsp.h"
#include "celp_filters.h"
#include "acelp_vectors.h"
@@ -163,11 +163,11 @@ static float acelp_decode_gain_codef(float gain_corr_factor, const float *fc_v,
const float *ma_prediction_coeff,
int subframe_size, int ma_pred_order)
{
- mr_energy +=
- ff_scalarproduct_float_c(quant_energy, ma_prediction_coeff, ma_pred_order);
+ mr_energy += avpriv_scalarproduct_float_c(quant_energy, ma_prediction_coeff,
+ ma_pred_order);
mr_energy = gain_corr_factor * exp(M_LN10 / 20. * mr_energy) /
- sqrt((0.01 + ff_scalarproduct_float_c(fc_v, fc_v, subframe_size)));
+ sqrt((0.01 + avpriv_scalarproduct_float_c(fc_v, fc_v, subframe_size)));
return mr_energy;
}
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 0ae3748911..c3b6ab3b5f 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -30,8 +30,8 @@
#include <math.h>
#include "libavutil/channel_layout.h"
+#include "libavutil/float_dsp.h"
#include "libavutil/mem.h"
-#include "dsputil.h"
#include "avcodec.h"
#include "internal.h"
#include "get_bits.h"
@@ -523,7 +523,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch,
/* find best fitting point in history */
do {
- dot = ff_scalarproduct_float_c(in, ptr, size);
+ dot = avpriv_scalarproduct_float_c(in, ptr, size);
if (dot > optimal_gain) {
optimal_gain = dot;
best_hist_ptr = ptr;
@@ -532,7 +532,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch,
if (optimal_gain <= 0)
return -1;
- dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
+ dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
if (dot <= 0) // would be 1.0
return -1;
@@ -562,8 +562,8 @@ static float tilt_factor(const float *lpcs, int n_lpcs)
{
float rh0, rh1;
- rh0 = 1.0 + ff_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
- rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
+ rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
+ rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
return rh1 / rh0;
}
@@ -656,7 +656,8 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs,
-1.8 * tilt_factor(coeffs, remainder - 1),
coeffs, remainder);
}
- sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs, coeffs, remainder));
+ sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs,
+ remainder));
for (n = 0; n < remainder; n++)
coeffs[n] *= sq;
}
@@ -1320,7 +1321,8 @@ static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
/* Calculate gain for adaptive & fixed codebook signal.
* see ff_amr_set_fixed_gain(). */
idx = get_bits(gb, 7);
- fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err, gain_coeff, 6) -
+ fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err,
+ gain_coeff, 6) -
5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
acb_gain = wmavoice_gain_codebook_acb[idx];
pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index c9462a4bf4..98fed1734f 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -463,32 +463,6 @@ cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left
.src_unaligned:
ADD_HFYU_LEFT_LOOP 0, 0
-
-; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
-INIT_XMM sse
-cglobal scalarproduct_float, 3,3,2, v1, v2, offset
- neg offsetq
- shl offsetq, 2
- sub v1q, offsetq
- sub v2q, offsetq
- xorps xmm0, xmm0
- .loop:
- movaps xmm1, [v1q+offsetq]
- mulps xmm1, [v2q+offsetq]
- addps xmm0, xmm1
- add offsetq, 16
- js .loop
- movhlps xmm1, xmm0
- addps xmm0, xmm1
- movss xmm1, xmm0
- shufps xmm0, xmm0, 1
- addss xmm0, xmm1
-%if ARCH_X86_64 == 0
- movss r0m, xmm0
- fld dword r0m
-%endif
- RET
-
;-----------------------------------------------------------------------------
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
; int32_t max, unsigned int len)
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index c6d9939d8a..de416ef001 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -1987,8 +1987,6 @@ int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src,
int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src,
int w, int left);
-float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
-
void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src,
@@ -2252,8 +2250,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
- c->scalarproduct_float = ff_scalarproduct_float_sse;
-
#if HAVE_INLINE_ASM && CONFIG_VIDEODSP
c->gmc = gmc_sse;
#endif
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c
index b3644e82a2..a7245ad92b 100644
--- a/libavutil/arm/float_dsp_init_neon.c
+++ b/libavutil/arm/float_dsp_init_neon.c
@@ -43,6 +43,8 @@ void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
void ff_butterflies_float_neon(float *v1, float *v2, int len);
+float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
+
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
{
fdsp->vector_fmul = ff_vector_fmul_neon;
@@ -52,4 +54,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
fdsp->vector_fmul_add = ff_vector_fmul_add_neon;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
fdsp->butterflies_float = ff_butterflies_float_neon;
+ fdsp->scalarproduct_float = ff_scalarproduct_float_neon;
}
diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S
index 4acc406d33..559b565628 100644
--- a/libavutil/arm/float_dsp_neon.S
+++ b/libavutil/arm/float_dsp_neon.S
@@ -256,3 +256,16 @@ function ff_butterflies_float_neon, export=1
bgt 1b
bx lr
endfunc
+
+function ff_scalarproduct_float_neon, export=1
+ vmov.f32 q2, #0.0
+1: vld1.32 {q0},[r0,:128]!
+ vld1.32 {q1},[r1,:128]!
+ vmla.f32 q2, q0, q1
+ subs r2, r2, #4
+ bgt 1b
+ vadd.f32 d0, d4, d5
+ vpadd.f32 d0, d0, d0
+NOVFP vmov.32 r0, d0[0]
+ bx lr
+endfunc
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 05688e4c3f..50c9e60c32 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -104,6 +104,17 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2,
}
}
+float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
+{
+ float p = 0.0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ p += v1[i] * v2[i];
+
+ return p;
+}
+
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
{
fdsp->vector_fmul = vector_fmul_c;
@@ -114,6 +125,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
fdsp->vector_fmul_add = vector_fmul_add_c;
fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
fdsp->butterflies_float = butterflies_float_c;
+ fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
#if ARCH_ARM
ff_float_dsp_init_arm(fdsp);
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index ff83beddbe..6cc7e76c11 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -146,9 +146,31 @@ typedef struct AVFloatDSPContext {
* @param len length of vectors, multiple of 4
*/
void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
+
+ /**
+ * Calculate the scalar product of two vectors of floats.
+ *
+ * @param v1 first vector, 16-byte aligned
+ * @param v2 second vector, 16-byte aligned
+ * @param len length of vectors, multiple of 4
+ *
+ * @return sum of elementwise products
+ */
+ float (*scalarproduct_float)(const float *v1, const float *v2, int len);
} AVFloatDSPContext;
/**
+ * Return the scalar product of two vectors.
+ *
+ * @param v1 first input vector
+ * @param v2 first input vector
+ * @param len number of elements
+ *
+ * @return sum of elementwise products
+ */
+float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
+
+/**
* Initialize a float DSP context.
*
* @param fdsp float DSP context
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 3e5e91ad07..004e6cf1fe 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -236,4 +236,30 @@ VECTOR_FMUL_REVERSE
%if HAVE_AVX_EXTERNAL
INIT_YMM avx
VECTOR_FMUL_REVERSE
-%endif \ No newline at end of file
+%endif
+
+; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
+INIT_XMM sse
+cglobal scalarproduct_float, 3,3,2, v1, v2, offset
+ neg offsetq
+ shl offsetq, 2
+ sub v1q, offsetq
+ sub v2q, offsetq
+ xorps xmm0, xmm0
+.loop:
+ movaps xmm1, [v1q+offsetq]
+ mulps xmm1, [v2q+offsetq]
+ addps xmm0, xmm1
+ add offsetq, 16
+ js .loop
+ movhlps xmm1, xmm0
+ addps xmm0, xmm1
+ movss xmm1, xmm0
+ shufps xmm0, xmm0, 1
+ addss xmm0, xmm1
+%if ARCH_X86_64 == 0
+ movss r0m, xmm0
+ fld dword r0m
+%endif
+ RET
+
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index 9c58e2bc30..5c6383bc74 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -51,6 +51,8 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
const float *src1, int len);
+float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
+
#if HAVE_6REGS && HAVE_INLINE_ASM
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
const float *src1, const float *win,
@@ -135,6 +137,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
+ fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
}
if (EXTERNAL_SSE2(mm_flags)) {
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;