SphinxBase 5prealpha
fe_interface.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37#include <stdio.h>
38#include <string.h>
39#include <math.h>
40#include <stdlib.h>
41#include <assert.h>
42
43#ifdef HAVE_CONFIG_H
44#include <config.h>
45#endif
46
48#include "sphinxbase/byteorder.h"
49#include "sphinxbase/fixpoint.h"
50#include "sphinxbase/genrand.h"
51#include "sphinxbase/err.h"
52#include "sphinxbase/cmd_ln.h"
54
55#include "fe_internal.h"
56#include "fe_warp.h"
57
58static const arg_t fe_args[] = {
59 waveform_to_cepstral_command_line_macro(),
60 { NULL, 0, NULL, NULL }
61};
62
63int
64fe_parse_general_params(cmd_ln_t *config, fe_t * fe)
65{
66 int j, frate;
67
68 fe->config = config;
69 fe->sampling_rate = cmd_ln_float32_r(config, "-samprate");
70 frate = cmd_ln_int32_r(config, "-frate");
71 if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) {
73 ("Frame rate %d can not be bigger than sample rate %.02f\n",
74 frate, fe->sampling_rate);
75 return -1;
76 }
77
78 fe->frame_rate = (int16)frate;
79 if (cmd_ln_boolean_r(config, "-dither")) {
80 fe->dither = 1;
81 fe->dither_seed = cmd_ln_int32_r(config, "-seed");
82 }
83#ifdef WORDS_BIGENDIAN
84 fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
85#else
86 fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
87#endif
88 fe->window_length = cmd_ln_float32_r(config, "-wlen");
89 fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha");
90
91 fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep");
92 fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft");
93
94 /* Check FFT size, compute FFT order (log_2(n)) */
95 for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) {
96 if (((j % 2) != 0) || (fe->fft_size <= 0)) {
97 E_ERROR("fft: number of points must be a power of 2 (is %d)\n",
98 fe->fft_size);
99 return -1;
100 }
101 }
102 /* Verify that FFT size is greater or equal to window length. */
103 if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) {
104 E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n",
105 (int)(fe->window_length * fe->sampling_rate));
106 return -1;
107 }
108
109 fe->pre_speech = (int16)cmd_ln_int32_r(config, "-vad_prespeech");
110 fe->post_speech = (int16)cmd_ln_int32_r(config, "-vad_postspeech");
111 fe->start_speech = (int16)cmd_ln_int32_r(config, "-vad_startspeech");
112 fe->vad_threshold = cmd_ln_float32_r(config, "-vad_threshold");
113
114 fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc");
115 fe->remove_noise = cmd_ln_boolean_r(config, "-remove_noise");
116 fe->remove_silence = cmd_ln_boolean_r(config, "-remove_silence");
117
118 if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct"))
119 fe->transform = DCT_II;
120 else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy"))
121 fe->transform = LEGACY_DCT;
122 else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk"))
123 fe->transform = DCT_HTK;
124 else {
125 E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n");
126 return -1;
127 }
128
129 if (cmd_ln_boolean_r(config, "-logspec"))
130 fe->log_spec = RAW_LOG_SPEC;
131 if (cmd_ln_boolean_r(config, "-smoothspec"))
132 fe->log_spec = SMOOTH_LOG_SPEC;
133
134 return 0;
135}
136
137static int
138fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel)
139{
140 mel->sampling_rate = fe->sampling_rate;
141 mel->fft_size = fe->fft_size;
142 mel->num_cepstra = fe->num_cepstra;
143 mel->num_filters = cmd_ln_int32_r(config, "-nfilt");
144
145 if (fe->log_spec)
146 fe->feature_dimension = mel->num_filters;
147 else
148 fe->feature_dimension = fe->num_cepstra;
149
150 mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf");
151 mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf");
152
153 mel->doublewide = cmd_ln_boolean_r(config, "-doublebw");
154
155 mel->warp_type = cmd_ln_str_r(config, "-warp_type");
156 mel->warp_params = cmd_ln_str_r(config, "-warp_params");
157 mel->lifter_val = cmd_ln_int32_r(config, "-lifter");
158
159 mel->unit_area = cmd_ln_boolean_r(config, "-unit_area");
160 mel->round_filters = cmd_ln_boolean_r(config, "-round_filters");
161
162 if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) {
163 E_ERROR("Failed to initialize the warping function.\n");
164 return -1;
165 }
166 fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate);
167 return 0;
168}
169
170void
171fe_print_current(fe_t const *fe)
172{
173 E_INFO("Current FE Parameters:\n");
174 E_INFO("\tSampling Rate: %f\n", fe->sampling_rate);
175 E_INFO("\tFrame Size: %d\n", fe->frame_size);
176 E_INFO("\tFrame Shift: %d\n", fe->frame_shift);
177 E_INFO("\tFFT Size: %d\n", fe->fft_size);
178 E_INFO("\tLower Frequency: %g\n",
179 fe->mel_fb->lower_filt_freq);
180 E_INFO("\tUpper Frequency: %g\n",
181 fe->mel_fb->upper_filt_freq);
182 E_INFO("\tNumber of filters: %d\n", fe->mel_fb->num_filters);
183 E_INFO("\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps);
184 E_INFO("Will %sremove DC offset at frame level\n",
185 fe->remove_dc ? "" : "not ");
186 if (fe->dither) {
187 E_INFO("Will add dither to audio\n");
188 E_INFO("Dither seeded with %d\n", fe->dither_seed);
189 }
190 else {
191 E_INFO("Will not add dither to audio\n");
192 }
193 if (fe->mel_fb->lifter_val) {
194 E_INFO("Will apply sine-curve liftering, period %d\n",
195 fe->mel_fb->lifter_val);
196 }
197 E_INFO("Will %snormalize filters to unit area\n",
198 fe->mel_fb->unit_area ? "" : "not ");
199 E_INFO("Will %sround filter frequencies to DFT points\n",
200 fe->mel_fb->round_filters ? "" : "not ");
201 E_INFO("Will %suse double bandwidth in mel filter\n",
202 fe->mel_fb->doublewide ? "" : "not ");
203}
204
205fe_t *
206fe_init_auto()
207{
208 return fe_init_auto_r(cmd_ln_get());
209}
210
211fe_t *
212fe_init_auto_r(cmd_ln_t *config)
213{
214 fe_t *fe;
215 int prespch_frame_len;
216
217 fe = (fe_t*)ckd_calloc(1, sizeof(*fe));
218 fe->refcount = 1;
219
220 /* transfer params to front end */
221 if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) {
222 fe_free(fe);
223 return NULL;
224 }
225
226 /* compute remaining fe parameters */
227 /* We add 0.5 so approximate the float with the closest
228 * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4
229 */
230 fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5);
231 fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5);
232 fe->pre_emphasis_prior = 0;
233
234 fe_start_stream(fe);
235
236 assert (fe->frame_shift > 1);
237
238 if (fe->frame_size < fe->frame_shift) {
239 E_ERROR
240 ("Frame size %d (-wlen) must be greater than frame shift %d (-frate)\n",
241 fe->frame_size, fe->frame_shift);
242 fe_free(fe);
243 return NULL;
244 }
245
246
247 if (fe->frame_size > (fe->fft_size)) {
248 E_ERROR
249 ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n",
250 fe->frame_size, fe->fft_size);
251 fe_free(fe);
252 return NULL;
253 }
254
255 if (fe->dither)
256 fe_init_dither(fe->dither_seed);
257
258 /* establish buffers for overflow samps and hamming window */
259 fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16));
260 fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t));
261
262 /* create hamming window */
263 fe_create_hamming(fe->hamming_window, fe->frame_size);
264
265 /* init and fill appropriate filter structure */
266 fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb));
267
268 /* transfer params to mel fb */
269 fe_parse_melfb_params(config, fe, fe->mel_fb);
270
271 if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) {
272 E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n",
273 fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2);
274 fe_free(fe);
275 return NULL;
276 }
277
278 fe_build_melfilters(fe->mel_fb);
279
280 fe_compute_melcosine(fe->mel_fb);
281 if (fe->remove_noise || fe->remove_silence)
282 fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters);
283
284 fe->vad_data = (vad_data_t*)ckd_calloc(1, sizeof(*fe->vad_data));
285 prespch_frame_len = fe->log_spec != RAW_LOG_SPEC ? fe->num_cepstra : fe->mel_fb->num_filters;
286 fe->vad_data->prespch_buf = fe_prespch_init(fe->pre_speech + 1, prespch_frame_len, fe->frame_shift);
287
288 /* Create temporary FFT, spectrum and mel-spectrum buffers. */
289 /* FIXME: Gosh there are a lot of these. */
290 fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch));
291 fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame));
292 fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec));
293 fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec));
294
295 /* create twiddle factors */
296 fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc));
297 fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss));
298 fe_create_twiddle(fe);
299
300 if (cmd_ln_boolean_r(config, "-verbose")) {
301 fe_print_current(fe);
302 }
303
304 /*** Initialize the overflow buffers ***/
305 fe_start_utt(fe);
306 return fe;
307}
308
309arg_t const *
310fe_get_args(void)
311{
312 return fe_args;
313}
314
315const cmd_ln_t *
316fe_get_config(fe_t *fe)
317{
318 return fe->config;
319}
320
321void
322fe_init_dither(int32 seed)
323{
324 E_INFO("Using %d as the seed.\n", seed);
325 s3_rand_seed(seed);
326}
327
328static void
329fe_reset_vad_data(vad_data_t * vad_data)
330{
331 vad_data->in_speech = 0;
332 vad_data->pre_speech_frames = 0;
333 vad_data->post_speech_frames = 0;
334 fe_prespch_reset_cep(vad_data->prespch_buf);
335}
336
337int32
338fe_start_utt(fe_t * fe)
339{
340 fe->num_overflow_samps = 0;
341 memset(fe->overflow_samps, 0, fe->frame_size * sizeof(int16));
342 fe->pre_emphasis_prior = 0;
343 fe_reset_vad_data(fe->vad_data);
344 return 0;
345}
346
347void
348fe_start_stream(fe_t *fe)
349{
350 fe->num_processed_samps = 0;
351 fe_reset_noisestats(fe->noise_stats);
352}
353
354int
355fe_get_output_size(fe_t *fe)
356{
357 return (int)fe->feature_dimension;
358}
359
360void
361fe_get_input_size(fe_t *fe, int *out_frame_shift,
362 int *out_frame_size)
363{
364 if (out_frame_shift)
365 *out_frame_shift = fe->frame_shift;
366 if (out_frame_size)
367 *out_frame_size = fe->frame_size;
368}
369
370uint8
371fe_get_vad_state(fe_t *fe)
372{
373 return fe->vad_data->in_speech;
374}
375
376int
377fe_process_frames(fe_t *fe,
378 int16 const **inout_spch,
379 size_t *inout_nsamps,
380 mfcc_t **buf_cep,
381 int32 *inout_nframes,
382 int32 *out_frameidx)
383{
384 return fe_process_frames_ext(fe, inout_spch, inout_nsamps, buf_cep, inout_nframes, NULL, NULL, out_frameidx);
385}
386
387
391static int
392fe_copy_from_prespch(fe_t *fe, int32 *inout_nframes, mfcc_t **buf_cep, int outidx)
393{
394 while ((*inout_nframes) > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) > 0) {
395 outidx++;
396 (*inout_nframes)--;
397 }
398 return outidx;
399}
400
404static int
405fe_check_prespeech(fe_t *fe, int32 *inout_nframes, mfcc_t **buf_cep, int outidx, int32 *out_frameidx, size_t *inout_nsamps, int orig_nsamps)
406{
407 if (fe->vad_data->in_speech) {
408 if (fe_prespch_ncep(fe->vad_data->prespch_buf) > 0) {
409
410 /* Previous frame triggered vad into speech state. Last frame is in the end of
411 prespeech buffer, so overwrite it */
412 outidx = fe_copy_from_prespch(fe, inout_nframes, buf_cep, outidx);
413
414 /* Sets the start frame for the returned data so that caller can update timings */
415 if (out_frameidx) {
416 *out_frameidx = (fe->num_processed_samps + orig_nsamps - *inout_nsamps) / fe->frame_shift - fe->pre_speech;
417 }
418 } else {
419 outidx++;
420 (*inout_nframes)--;
421 }
422 }
423 /* Amount of data behind the original input which is still needed. */
424 if (fe->num_overflow_samps > 0)
425 fe->num_overflow_samps -= fe->frame_shift;
426
427 return outidx;
428}
429
430int
431fe_process_frames_ext(fe_t *fe,
432 int16 const **inout_spch,
433 size_t *inout_nsamps,
434 mfcc_t **buf_cep,
435 int32 *inout_nframes,
436 int16 *voiced_spch,
437 int32 *voiced_spch_nsamps,
438 int32 *out_frameidx)
439{
440 int outidx, n_overflow, orig_n_overflow;
441 int16 const *orig_spch;
442 size_t orig_nsamps;
443
444 /* The logic here is pretty complex, please be careful with modifications */
445
446 /* FIXME: Dump PCM data if needed */
447
448 /* In the special case where there is no output buffer, return the
449 * maximum number of frames which would be generated. */
450 if (buf_cep == NULL) {
451 if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size)
452 *inout_nframes = 0;
453 else
454 *inout_nframes = 1
455 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
456 / fe->frame_shift);
457 if (!fe->vad_data->in_speech)
458 *inout_nframes += fe_prespch_ncep(fe->vad_data->prespch_buf);
459 return *inout_nframes;
460 }
461
462 if (out_frameidx)
463 *out_frameidx = 0;
464
465 /* Are there not enough samples to make at least 1 frame? */
466 if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) {
467 if (*inout_nsamps > 0) {
468 /* Append them to the overflow buffer. */
469 memcpy(fe->overflow_samps + fe->num_overflow_samps,
470 *inout_spch, *inout_nsamps * (sizeof(int16)));
471 fe->num_overflow_samps += *inout_nsamps;
472 fe->num_processed_samps += *inout_nsamps;
473 *inout_spch += *inout_nsamps;
474 *inout_nsamps = 0;
475 }
476 /* We produced no frames of output, sorry! */
477 *inout_nframes = 0;
478 return 0;
479 }
480
481 /* Can't write a frame? Then do nothing! */
482 if (*inout_nframes < 1) {
483 *inout_nframes = 0;
484 return 0;
485 }
486
487 /* Index of output frame. */
488 outidx = 0;
489
490 /* Try to read from prespeech buffer */
491 if (fe->vad_data->in_speech && fe_prespch_ncep(fe->vad_data->prespch_buf) > 0) {
492 outidx = fe_copy_from_prespch(fe, inout_nframes, buf_cep, outidx);
493 if ((*inout_nframes) < 1) {
494 /* mfcc buffer is filled from prespeech buffer */
495 *inout_nframes = outidx;
496 return 0;
497 }
498 }
499
500 /* Keep track of the original start of the buffer. */
501 orig_spch = *inout_spch;
502 orig_nsamps = *inout_nsamps;
503 orig_n_overflow = fe->num_overflow_samps;
504
505 /* Start processing, taking care of any incoming overflow. */
506 if (fe->num_overflow_samps > 0) {
507 int offset = fe->frame_size - fe->num_overflow_samps;
508 /* Append start of spch to overflow samples to make a full frame. */
509 memcpy(fe->overflow_samps + fe->num_overflow_samps,
510 *inout_spch, offset * sizeof(**inout_spch));
511 fe_read_frame(fe, fe->overflow_samps, fe->frame_size);
512 /* Update input-output pointers and counters. */
513 *inout_spch += offset;
514 *inout_nsamps -= offset;
515 } else {
516 fe_read_frame(fe, *inout_spch, fe->frame_size);
517 /* Update input-output pointers and counters. */
518 *inout_spch += fe->frame_size;
519 *inout_nsamps -= fe->frame_size;
520 }
521
522 fe_write_frame(fe, buf_cep[outidx], voiced_spch != NULL);
523 outidx = fe_check_prespeech(fe, inout_nframes, buf_cep, outidx, out_frameidx, inout_nsamps, orig_nsamps);
524
525 /* Process all remaining frames. */
526 while (*inout_nframes > 0 && *inout_nsamps >= (size_t)fe->frame_shift) {
527 fe_shift_frame(fe, *inout_spch, fe->frame_shift);
528 fe_write_frame(fe, buf_cep[outidx], voiced_spch != NULL);
529
530 outidx = fe_check_prespeech(fe, inout_nframes, buf_cep, outidx, out_frameidx, inout_nsamps, orig_nsamps);
531
532 /* Update input-output pointers and counters. */
533 *inout_spch += fe->frame_shift;
534 *inout_nsamps -= fe->frame_shift;
535 }
536
537 /* How many relevant overflow samples are there left? */
538 if (fe->num_overflow_samps <= 0) {
539 /* Maximum number of overflow samples past *inout_spch to save. */
540 n_overflow = *inout_nsamps;
541 if (n_overflow > fe->frame_shift)
542 n_overflow = fe->frame_shift;
543 fe->num_overflow_samps = fe->frame_size - fe->frame_shift;
544 /* Make sure this isn't an illegal read! */
545 if (fe->num_overflow_samps > *inout_spch - orig_spch)
546 fe->num_overflow_samps = *inout_spch - orig_spch;
547 fe->num_overflow_samps += n_overflow;
548 if (fe->num_overflow_samps > 0) {
549 memcpy(fe->overflow_samps,
550 *inout_spch - (fe->frame_size - fe->frame_shift),
551 fe->num_overflow_samps * sizeof(**inout_spch));
552 /* Update the input pointer to cover this stuff. */
553 *inout_spch += n_overflow;
554 *inout_nsamps -= n_overflow;
555 }
556 } else {
557 /* There is still some relevant data left in the overflow buffer. */
558 /* Shift existing data to the beginning. */
559 memmove(fe->overflow_samps,
560 fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps,
561 fe->num_overflow_samps * sizeof(*fe->overflow_samps));
562 /* Copy in whatever we had in the original speech buffer. */
563 n_overflow = *inout_spch - orig_spch + *inout_nsamps;
564 if (n_overflow > fe->frame_size - fe->num_overflow_samps)
565 n_overflow = fe->frame_size - fe->num_overflow_samps;
566 memcpy(fe->overflow_samps + fe->num_overflow_samps,
567 orig_spch, n_overflow * sizeof(*orig_spch));
568 fe->num_overflow_samps += n_overflow;
569 /* Advance the input pointers. */
570 if (n_overflow > *inout_spch - orig_spch) {
571 n_overflow -= (*inout_spch - orig_spch);
572 *inout_spch += n_overflow;
573 *inout_nsamps -= n_overflow;
574 }
575 }
576
577 /* Finally update the frame counter with the number of frames
578 * and global sample counter with number of samples we procesed */
579 *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */
580 fe->num_processed_samps += orig_nsamps - *inout_nsamps;
581
582 return 0;
583}
584
585int
586fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps,
587 mfcc_t *** cep_block, int32 * nframes)
588{
589 mfcc_t **cep;
590 int rv;
591
592 /* Figure out how many frames we will need. */
593 fe_process_frames(fe, NULL, &nsamps, NULL, nframes, NULL);
594 /* Create the output buffer (it has to exist, even if there are no output frames). */
595 if (*nframes)
596 cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep));
597 else
598 cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep));
599 /* Now just call fe_process_frames() with the allocated buffer. */
600 rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes, NULL);
601 *cep_block = cep;
602
603 return rv;
604}
605
606
607int32
608fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes)
609{
610 /* Process any remaining data, not very accurate for the VAD */
611 *nframes = 0;
612 if (fe->num_overflow_samps > 0) {
613 fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
614 fe_write_frame(fe, cepvector, FALSE);
615 if (fe->vad_data->in_speech)
616 *nframes = 1;
617 }
618
619 /* reset overflow buffers... */
620 fe->num_overflow_samps = 0;
621
622 return 0;
623}
624
625fe_t *
626fe_retain(fe_t *fe)
627{
628 ++fe->refcount;
629 return fe;
630}
631
632int
633fe_free(fe_t * fe)
634{
635 if (fe == NULL)
636 return 0;
637 if (--fe->refcount > 0)
638 return fe->refcount;
639
640 /* kill FE instance - free everything... */
641 if (fe->mel_fb) {
642 if (fe->mel_fb->mel_cosine)
643 fe_free_2d((void *) fe->mel_fb->mel_cosine);
644 ckd_free(fe->mel_fb->lifter);
645 ckd_free(fe->mel_fb->spec_start);
646 ckd_free(fe->mel_fb->filt_start);
647 ckd_free(fe->mel_fb->filt_width);
648 ckd_free(fe->mel_fb->filt_coeffs);
649 ckd_free(fe->mel_fb);
650 }
651 ckd_free(fe->spch);
652 ckd_free(fe->frame);
653 ckd_free(fe->ccc);
654 ckd_free(fe->sss);
655 ckd_free(fe->spec);
656 ckd_free(fe->mfspec);
657 ckd_free(fe->overflow_samps);
658 ckd_free(fe->hamming_window);
659
660 if (fe->noise_stats)
661 fe_free_noisestats(fe->noise_stats);
662
663 if (fe->vad_data) {
664 fe_prespch_free(fe->vad_data->prespch_buf);
665 ckd_free(fe->vad_data);
666 }
667
668 cmd_ln_free_r(fe->config);
669 ckd_free(fe);
670
671 return 0;
672}
673
677int32
678fe_mfcc_to_float(fe_t * fe,
679 mfcc_t ** input, float32 ** output, int32 nframes)
680{
681 int32 i;
682
683#ifndef FIXED_POINT
684 if ((void *) input == (void *) output)
685 return nframes * fe->feature_dimension;
686#endif
687 for (i = 0; i < nframes * fe->feature_dimension; ++i)
688 output[0][i] = MFCC2FLOAT(input[0][i]);
689
690 return i;
691}
692
696int32
697fe_float_to_mfcc(fe_t * fe,
698 float32 ** input, mfcc_t ** output, int32 nframes)
699{
700 int32 i;
701
702#ifndef FIXED_POINT
703 if ((void *) input == (void *) output)
704 return nframes * fe->feature_dimension;
705#endif
706 for (i = 0; i < nframes * fe->feature_dimension; ++i)
707 output[0][i] = FLOAT2MFCC(input[0][i]);
708
709 return i;
710}
711
712int32
713fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
714{
715#ifdef FIXED_POINT
716 fe_spec2cep(fe, fr_spec, fr_cep);
717#else /* ! FIXED_POINT */
718 powspec_t *powspec;
719 int32 i;
720
721 powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
722 for (i = 0; i < fe->mel_fb->num_filters; ++i)
723 powspec[i] = (powspec_t) fr_spec[i];
724 fe_spec2cep(fe, powspec, fr_cep);
725 ckd_free(powspec);
726#endif /* ! FIXED_POINT */
727 return 0;
728}
729
730int32
731fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
732{
733#ifdef FIXED_POINT
734 fe_dct2(fe, fr_spec, fr_cep, 0);
735#else /* ! FIXED_POINT */
736 powspec_t *powspec;
737 int32 i;
738
739 powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
740 for (i = 0; i < fe->mel_fb->num_filters; ++i)
741 powspec[i] = (powspec_t) fr_spec[i];
742 fe_dct2(fe, powspec, fr_cep, 0);
743 ckd_free(powspec);
744#endif /* ! FIXED_POINT */
745 return 0;
746}
747
748int32
749fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec)
750{
751#ifdef FIXED_POINT
752 fe_dct3(fe, fr_cep, fr_spec);
753#else /* ! FIXED_POINT */
754 powspec_t *powspec;
755 int32 i;
756
757 powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
758 fe_dct3(fe, fr_cep, powspec);
759 for (i = 0; i < fe->mel_fb->num_filters; ++i)
760 fr_spec[i] = (mfcc_t) powspec[i];
761 ckd_free(powspec);
762#endif /* ! FIXED_POINT */
763 return 0;
764}
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition ckd_alloc.c:244
#define ckd_malloc(sz)
Macro for ckd_malloc
Definition ckd_alloc.h:253
#define ckd_calloc_2d(d1, d2, sz)
Macro for ckd_calloc_2d
Definition ckd_alloc.h:270
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition ckd_alloc.h:248
Command-line and other configurationparsing and handling.
#define cmd_ln_boolean_r(c, n)
Retrieve a boolean value from a command-line object.
Definition cmd_ln.h:334
SPHINXBASE_EXPORT int cmd_ln_free_r(cmd_ln_t *cmdln)
Release a command-line argument set and all associated strings.
Definition cmd_ln.c:1046
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_get(void)
Retrieve the global cmd_ln_t object used by non-re-entrant functions.
Definition cmd_ln.c:493
SPHINXBASE_EXPORT char const * cmd_ln_str_r(cmd_ln_t *cmdln, char const *name)
Retrieve a string from a command-line object.
Definition cmd_ln.c:949
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_retain(cmd_ln_t *cmdln)
Retain ownership of a command-line argument set.
Definition cmd_ln.c:1039
Implementation of logging routines.
#define E_ERROR(...)
Print error message to error log.
Definition err.h:104
#define E_INFO(...)
Print logging information to standard error stream.
Definition err.h:114
High performance prortable random generator created by Takuji Nishimura and Makoto Matsumoto.
#define s3_rand_seed(s)
Macros to simplify calling of random generator function.
Definition genrand.h:144
Basic type definitions used in Sphinx.
Argument definition structure.
Opaque structure used to hold the results of command-line parsing.
Structure for the front-end computation.
Base Struct to hold all structure for MFCC computation.
Definition fe_internal.h:75