#include <math.h>
#include <string.h>
+#include <stdint.h>
#include <pulse/xmalloc.h>
a->hangover = 0;
memset(a->x, 0, sizeof(a->x));
memset(a->xf, 0, sizeof(a->xf));
- memset(a->w, 0, sizeof(a->w));
+ memset(a->w_arr, 0, sizeof(a->w_arr));
a->j = NLMS_EXT;
a->delta = 0.0f;
AEC_setambient(a, NoiseFloor);
a->dumpcnt = 0;
memset(a->ws, 0, sizeof(a->ws));
- if (have_vector)
+ if (have_vector) {
+ /* Get a 16-byte aligned location */
+ a->w = (REAL *) (((uintptr_t) a->w_arr) + (((uintptr_t) a->w_arr) % 16));
a->dotp = dotp_sse;
- else
+ } else {
+ /* We don't care about alignment, just use the array as-is */
+ a->w = a->w_arr;
a->dotp = dotp;
+ }
return a;
}
// NLMS-pw
REAL x[NLMS_LEN + NLMS_EXT]; // tap delayed loudspeaker signal
REAL xf[NLMS_LEN + NLMS_EXT]; // pre-whitening tap delayed signal
- PA_DECLARE_ALIGNED(16, REAL, w[NLMS_LEN]); // tap weights
+ REAL w_arr[NLMS_LEN+16]; // tap weights
+ REAL *w; // this will be a 16-byte aligned pointer into w_arr
int j; // optimize: less memory copies
double dotp_xf_xf; // double to avoid loss of precision
float delta; // noise floor to stabilize NLMS