1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 * Copyright (c) 2008 Beyond Access, Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY
18 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BEYOND ACCESS, INC. NOR
21 * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * @file yin.c Implementation of pitch extraction.
32 * @author David Huggins-Daines <dhuggins@cs.cmu.edu>
35 /* This implements part of the YIN algorithm:
37 * "YIN, a fundamental frequency estimator for speech and music".
38 * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical
39 * Society of America, 111 (4), April 2002.
42 #include "sphinxbase/prim_type.h"
43 #include "sphinxbase/ckd_alloc.h"
44 #include "sphinxbase/fixpoint.h"
46 #include "sphinxbase/yin.h"
51 uint16 frame_size; /** Size of analysis frame. */
52 uint16 search_threshold; /**< Threshold for finding period, in Q15 */
53 uint16 search_range; /**< Range around best local estimate to search, in Q15 */
54 uint16 nfr; /**< Number of frames read so far. */
56 unsigned char wsize; /**< Size of smoothing window. */
57 unsigned char wstart; /**< First frame in window. */
58 unsigned char wcur; /**< Current frame of analysis. */
59 unsigned char endut; /**< Hoch Hech! Are we at the utterance end? */
61 fixed32 **diff_window; /**< Window of difference function outputs. */
62 uint16 *period_window; /**< Window of best period estimates. */
66 * The core of YIN: cumulative mean normalized difference function.
69 cmn_diff(int16 const *signal, int32 *out_diff, int ndiff)
78 /* Determine how many bits we can scale t up by below. */
79 for (tscale = 0; tscale < 32; ++tscale)
80 if (ndiff & (1<<(31-tscale)))
82 --tscale; /* Avoid teh overflowz. */
83 /* printf("tscale is %d (ndiff - 1) << tscale is %d\n",
84 tscale, (ndiff-1) << tscale); */
86 /* Somewhat elaborate block floating point implementation.
87 * The fp implementation of this is really a lot simpler. */
88 for (t = 1; t < ndiff; ++t) {
89 uint32 dd, dshift, norm;
94 for (j = 0; j < ndiff; ++j) {
95 int diff = signal[j] - signal[t + j];
96 /* Guard against overflows. */
97 if (dd > (1UL<<tscale)) {
101 dd += (diff * diff) >> dshift;
103 /* Make sure the diffs and cum are shifted to the same
104 * scaling factor (usually dshift will be zero) */
105 if (dshift > cshift) {
106 cum += dd << (dshift-cshift);
109 cum += dd >> (cshift-dshift);
112 /* Guard against overflows and also ensure that (t<<tscale) > cum. */
113 while (cum > (1UL<<tscale)) {
117 /* Avoid divide-by-zero! */
118 if (cum == 0) cum = 1;
119 /* Calculate the normalizer in high precision. */
120 norm = (t << tscale) / cum;
121 /* Do a long multiply and shift down to Q15. */
122 out_diff[t] = (int32)(((long long)dd * norm)
123 >> (tscale - 15 + cshift - dshift));
124 /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n",
125 dd, cshift, dshift, (t<<tscale), cum, norm, out_diff[t]); */
130 yin_init(int frame_size, float search_threshold,
131 float search_range, int smooth_window)
135 pe = ckd_calloc(1, sizeof(*pe));
136 pe->frame_size = frame_size;
137 pe->search_threshold = (uint16)(search_threshold * 32768);
138 pe->search_range = (uint16)(search_range * 32768);
139 pe->wsize = smooth_window * 2 + 1;
140 pe->diff_window = ckd_calloc_2d(pe->wsize,
142 sizeof(**pe->diff_window));
143 pe->period_window = ckd_calloc(pe->wsize,
144 sizeof(*pe->period_window));
151 ckd_free_2d(pe->diff_window);
152 ckd_free(pe->period_window);
159 /* Reset the circular window pointers. */
160 pe->wstart = pe->endut = 0;
171 thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end)
177 for (i = start; i < end; ++i) {
178 int diff = diff_window[i];
180 if (diff < threshold) {
194 yin_write(yin_t *pe, int16 const *frame)
198 /* Rotate the window one frame forward. */
200 /* Fill in the frame before wstart. */
201 outptr = pe->wstart - 1;
202 /* Wrap around the window pointer. */
203 if (pe->wstart == pe->wsize)
206 /* Now calculate normalized difference function. */
207 difflen = pe->frame_size / 2;
208 cmn_diff(frame, pe->diff_window[outptr], difflen);
210 /* Find the first point under threshold. If not found, then
211 * use the absolute minimum. */
212 pe->period_window[outptr]
213 = thresholded_search(pe->diff_window[outptr],
214 pe->search_threshold, 0, difflen);
216 /* Increment total number of frames. */
221 yin_read(yin_t *pe, uint16 *out_period, uint16 *out_bestdiff)
223 int wstart, wlen, half_wsize, i;
224 int best, best_diff, search_width, low_period, high_period;
226 half_wsize = (pe->wsize-1)/2;
227 /* Without any smoothing, just return the current value (don't
228 * need to do anything to the current poitner either). */
229 if (half_wsize == 0) {
232 *out_period = pe->period_window[0];
233 *out_bestdiff = pe->diff_window[0][pe->period_window[0]];
237 /* We can't do anything unless we have at least (wsize-1)/2 + 1
238 * frames, unless we're at the end of the utterance. */
239 if (pe->endut == 0 && pe->nfr < half_wsize + 1) {
240 /* Don't increment the current pointer either. */
244 /* Establish the smoothing window. */
245 /* End of utterance. */
247 /* We are done (no more data) when pe->wcur = pe->wstart. */
248 if (pe->wcur == pe->wstart)
250 /* I.e. pe->wcur (circular minus) half_wsize. */
251 wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize;
252 /* Number of frames from wstart up to pe->wstart. */
253 wlen = pe->wstart - wstart;
254 if (wlen < 0) wlen += pe->wsize;
255 /*printf("ENDUT! ");*/
257 /* Beginning of utterance. */
258 else if (pe->nfr < pe->wsize) {
262 /* Normal case, it is what it is. */
268 /* Now (finally) look for the best local estimate. */
269 /* printf("Searching for local estimate in %d frames around %d\n",
270 wlen, pe->nfr + 1 - wlen); */
271 best = pe->period_window[pe->wcur];
272 best_diff = pe->diff_window[pe->wcur][best];
273 for (i = 0; i < wlen; ++i) {
278 diff = pe->diff_window[j][pe->period_window[j]];
279 /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768,
280 pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */
281 if (diff < best_diff) {
283 best = pe->period_window[j];
286 /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768,
287 best ? 8000.0/best : 8000.0); */
288 /* If it's the same as the current one then return it. */
289 if (best == pe->period_window[pe->wcur]) {
290 /* Increment the current pointer. */
291 if (++pe->wcur == pe->wsize)
294 *out_bestdiff = best_diff;
297 /* Otherwise, redo the search inside a narrower range. */
298 search_width = best * pe->search_range / 32768;
299 /* printf("Search width = %d * %.2f = %d\n",
300 best, (double)pe->search_range/32768, search_width); */
301 if (search_width == 0) search_width = 1;
302 low_period = best - search_width;
303 high_period = best + search_width;
304 if (low_period < 0) low_period = 0;
305 if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2;
306 /* printf("Searching from %d to %d\n", low_period, high_period); */
307 best = thresholded_search(pe->diff_window[pe->wcur],
308 pe->search_threshold,
309 low_period, high_period);
310 best_diff = pe->diff_window[pe->wcur][best];
313 *out_period = (best > 65535) ? 65535 : best;
315 *out_bestdiff = (best_diff > 65535) ? 65535 : best_diff;
317 /* Increment the current pointer. */
318 if (++pe->wcur == pe->wsize)