Asterisk - The Open Source Telephony Project  21.4.1
res_speech.c
Go to the documentation of this file.
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2006, Digium, Inc.
5  *
6  * Joshua Colp <jcolp@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18 
19 /*! \file
20  *
21  * \brief Generic Speech Recognition API
22  *
23  * \author Joshua Colp <jcolp@digium.com>
24  */
25 
26 /*** MODULEINFO
27  <support_level>core</support_level>
28  ***/
29 
30 #include "asterisk.h"
31 
32 #include "asterisk/channel.h"
33 #include "asterisk/module.h"
34 #include "asterisk/lock.h"
35 #include "asterisk/linkedlists.h"
36 #include "asterisk/cli.h"
37 #include "asterisk/term.h"
38 #include "asterisk/speech.h"
39 #include "asterisk/format_cache.h"
40 #include "asterisk/translate.h"
41 
43 static struct ast_speech_engine *default_engine = NULL;
44 
45 /*! \brief Find a speech recognition engine of specified name, if NULL then use the default one */
46 struct ast_speech_engine *ast_speech_find_engine(const char *engine_name)
47 {
48  struct ast_speech_engine *engine = NULL;
49 
50  /* If no name is specified -- use the default engine */
51  if (ast_strlen_zero(engine_name))
52  return default_engine;
53 
55  AST_RWLIST_TRAVERSE(&engines, engine, list) {
56  if (!strcasecmp(engine->name, engine_name)) {
57  break;
58  }
59  }
61 
62  return engine;
63 }
64 
65 /*! \brief Activate a loaded (either local or global) grammar */
66 int ast_speech_grammar_activate(struct ast_speech *speech, const char *grammar_name)
67 {
68  return (speech->engine->activate ? speech->engine->activate(speech, grammar_name) : -1);
69 }
70 
71 /*! \brief Deactivate a loaded grammar on a speech structure */
72 int ast_speech_grammar_deactivate(struct ast_speech *speech, const char *grammar_name)
73 {
74  return (speech->engine->deactivate ? speech->engine->deactivate(speech, grammar_name) : -1);
75 }
76 
77 /*! \brief Load a local grammar on a speech structure */
78 int ast_speech_grammar_load(struct ast_speech *speech, const char *grammar_name, const char *grammar)
79 {
80  return (speech->engine->load ? speech->engine->load(speech, grammar_name, grammar) : -1);
81 }
82 
83 /*! \brief Unload a local grammar from a speech structure */
84 int ast_speech_grammar_unload(struct ast_speech *speech, const char *grammar_name)
85 {
86  return (speech->engine->unload ? speech->engine->unload(speech, grammar_name) : -1);
87 }
88 
89 /*! \brief Return the results of a recognition from the speech structure */
91 {
92  return (speech->engine->get ? speech->engine->get(speech) : NULL);
93 }
94 
95 /*! \brief Free a list of results */
97 {
98  struct ast_speech_result *current_result = result, *prev_result = NULL;
99  int res = 0;
100 
101  while (current_result != NULL) {
102  prev_result = current_result;
103  /* Deallocate what we can */
104  if (current_result->text != NULL) {
105  ast_free(current_result->text);
106  current_result->text = NULL;
107  }
108  if (current_result->grammar != NULL) {
109  ast_free(current_result->grammar);
110  current_result->grammar = NULL;
111  }
112  /* Move on and then free ourselves */
113  current_result = AST_LIST_NEXT(current_result, list);
114  ast_free(prev_result);
115  prev_result = NULL;
116  }
117 
118  return res;
119 }
120 
121 /*! \brief Start speech recognition on a speech structure */
122 void ast_speech_start(struct ast_speech *speech)
123 {
124 
125  /* Clear any flags that may affect things */
126  ast_clear_flag(speech, AST_SPEECH_SPOKE);
127  ast_clear_flag(speech, AST_SPEECH_QUIET);
128  ast_clear_flag(speech, AST_SPEECH_HAVE_RESULTS);
129 
130  /* If results are on the structure, free them since we are starting again */
131  if (speech->results) {
133  speech->results = NULL;
134  }
135 
136  /* If the engine needs to start stuff up, do it */
137  if (speech->engine->start)
138  speech->engine->start(speech);
139 
140  return;
141 }
142 
143 /*! \brief Write in signed linear audio to be recognized */
144 int ast_speech_write(struct ast_speech *speech, void *data, int len)
145 {
146  /* Make sure the speech engine is ready to accept audio */
147  if (speech->state != AST_SPEECH_STATE_READY)
148  return -1;
149 
150  return speech->engine->write(speech, data, len);
151 }
152 
153 /*! \brief Signal to the engine that DTMF was received */
154 int ast_speech_dtmf(struct ast_speech *speech, const char *dtmf)
155 {
156  int res = 0;
157 
158  if (speech->state != AST_SPEECH_STATE_READY)
159  return -1;
160 
161  if (speech->engine->dtmf != NULL) {
162  res = speech->engine->dtmf(speech, dtmf);
163  }
164 
165  return res;
166 }
167 
168 /*! \brief Change an engine specific attribute */
169 int ast_speech_change(struct ast_speech *speech, const char *name, const char *value)
170 {
171  return (speech->engine->change ? speech->engine->change(speech, name, value) : -1);
172 }
173 
174 /*! \brief Get an engine specific attribute */
175 int ast_speech_get_setting(struct ast_speech *speech, const char *name, char *buf, size_t len)
176 {
177  return (speech->engine->get_setting ? speech->engine->get_setting(speech, name, buf, len) : -1);
178 }
179 
180 /*! \brief Create a new speech structure using the engine specified */
181 struct ast_speech *ast_speech_new(const char *engine_name, const struct ast_format_cap *cap)
182 {
183  struct ast_speech_engine *engine = NULL;
184  struct ast_speech *new_speech = NULL;
185  struct ast_format_cap *joint;
186  RAII_VAR(struct ast_format *, best, NULL, ao2_cleanup);
187  RAII_VAR(struct ast_format *, best_translated, NULL, ao2_cleanup);
188 
189  /* Try to find the speech recognition engine that was requested */
190  if (!(engine = ast_speech_find_engine(engine_name)))
191  return NULL;
192 
194  if (!joint) {
195  return NULL;
196  }
197 
198  ast_format_cap_get_compatible(engine->formats, cap, joint);
199  best = ast_format_cap_get_format(joint, 0);
200  ao2_ref(joint, -1);
201 
202  if (!best) {
204  best = ao2_bump(ast_format_slin);
205  } else {
206  /*
207  * If there is no overlap and the engine does not support slin, find the best
208  * format to translate to and set that as the 'best' input format for the engine.
209  * API consumer is responsible for translating to this format.
210  * Safe to cast cap as ast_translator_best_choice does not modify the caps
211  */
212  if (ast_translator_best_choice(engine->formats, (struct ast_format_cap *)cap, &best, &best_translated)) {
213  /* No overlapping formats and no translatable formats */
214  return NULL;
215  }
216  }
217  }
218 
219  /* Allocate our own speech structure, and try to allocate a structure from the engine too */
220  if (!(new_speech = ast_calloc(1, sizeof(*new_speech)))) {
221  return NULL;
222  }
223 
224  /* Initialize the lock */
225  ast_mutex_init(&new_speech->lock);
226 
227  /* Make sure no results are present */
228  new_speech->results = NULL;
229 
230  /* Copy over our engine pointer */
231  new_speech->engine = engine;
232 
233  /* Can't forget the format audio is going to be in */
234  new_speech->format = ao2_bump(best);
235 
236  /* We are not ready to accept audio yet */
237  ast_speech_change_state(new_speech, AST_SPEECH_STATE_NOT_READY);
238 
239  /* Pass ourselves to the engine so they can set us up some more and if they error out then do not create a structure */
240  if (engine->create(new_speech, new_speech->format)) {
241  ast_mutex_destroy(&new_speech->lock);
242  ao2_ref(new_speech->format, -1);
243  ast_free(new_speech);
244  return NULL;
245  }
246 
247  return new_speech;
248 }
249 
250 /*! \brief Destroy a speech structure */
251 int ast_speech_destroy(struct ast_speech *speech)
252 {
253  int res = 0;
254 
255  /* Call our engine so we are destroyed properly */
256  speech->engine->destroy(speech);
257 
258  /* Deinitialize the lock */
259  ast_mutex_destroy(&speech->lock);
260 
261  /* If results exist on the speech structure, destroy them */
262  if (speech->results)
264 
265  /* If a processing sound is set - free the memory used by it */
266  if (speech->processing_sound)
267  ast_free(speech->processing_sound);
268 
269  ao2_ref(speech->format, -1);
270 
271  /* Aloha we are done */
272  ast_free(speech);
273 
274  return res;
275 }
276 
277 /*! \brief Change state of a speech structure */
278 int ast_speech_change_state(struct ast_speech *speech, int state)
279 {
280  int res = 0;
281 
282  switch (state) {
283  case AST_SPEECH_STATE_WAIT:
284  /* The engine heard audio, so they spoke */
285  ast_set_flag(speech, AST_SPEECH_SPOKE);
286  default:
287  speech->state = state;
288  break;
289  }
290 
291  return res;
292 }
293 
294 const char *ast_speech_results_type_to_string(enum ast_speech_results_type type)
295 {
296  switch (type) {
297  case AST_SPEECH_RESULTS_TYPE_NORMAL:
298  return "normal";
299  case AST_SPEECH_RESULTS_TYPE_NBEST:
300  return "nbest";
301  default:
302  ast_assert(0);
303  return "unknown";
304  }
305 }
306 
307 /*! \brief Change the type of results we want */
308 int ast_speech_change_results_type(struct ast_speech *speech, enum ast_speech_results_type results_type)
309 {
310  speech->results_type = results_type;
311 
312  return (speech->engine->change_results_type ? speech->engine->change_results_type(speech, results_type) : 0);
313 }
314 
315 /*! \brief Register a speech recognition engine */
317 {
318  int res = 0;
319 
320  /* Confirm the engine meets the minimum API requirements */
321  if (!engine->create || !engine->write || !engine->destroy) {
322  ast_log(LOG_WARNING, "Speech recognition engine '%s' did not meet minimum API requirements.\n", engine->name);
323  return -1;
324  }
325 
326  /* If an engine is already loaded with this name, error out */
327  if (ast_speech_find_engine(engine->name)) {
328  ast_log(LOG_WARNING, "Speech recognition engine '%s' already exists.\n", engine->name);
329  return -1;
330  }
331 
332  ast_verb(5, "Registered speech recognition engine '%s'\n", engine->name);
333 
334  /* Add to the engine linked list and make default if needed */
336  AST_RWLIST_INSERT_HEAD(&engines, engine, list);
337  if (!default_engine) {
338  default_engine = engine;
339  ast_verb(5, "Made '%s' the default speech recognition engine\n", engine->name);
340  }
342 
343  return res;
344 }
345 
346 /*! \brief Unregister a speech recognition engine */
347 int ast_speech_unregister(const char *engine_name)
348 {
349  return ast_speech_unregister2(engine_name) == NULL ? -1 : 0;
350 }
351 
352 struct ast_speech_engine *ast_speech_unregister2(const char *engine_name)
353 {
354  struct ast_speech_engine *engine = NULL;
355 
356  if (ast_strlen_zero(engine_name)) {
357  return NULL;
358  }
359 
361  AST_RWLIST_TRAVERSE_SAFE_BEGIN(&engines, engine, list) {
362  if (!strcasecmp(engine->name, engine_name)) {
363  /* We have our engine... removed it */
364  AST_RWLIST_REMOVE_CURRENT(list);
365  /* If this was the default engine, we need to pick a new one */
366  if (engine == default_engine) {
367  default_engine = AST_RWLIST_FIRST(&engines);
368  }
369  ast_verb(5, "Unregistered speech recognition engine '%s'\n", engine_name);
370  /* All went well */
371  break;
372  }
373  }
374  AST_RWLIST_TRAVERSE_SAFE_END;
376 
377  return engine;
378 }
379 
381  int (*should_unregister)(const struct ast_speech_engine *engine, void *data), void *data,
382  void (*on_unregistered)(void *obj))
383 {
384  struct ast_speech_engine *engine = NULL;
385 
386  if (!should_unregister) {
387  return;
388  }
389 
391  AST_RWLIST_TRAVERSE_SAFE_BEGIN(&engines, engine, list) {
392  if (should_unregister(engine, data)) {
393  /* We have our engine... removed it */
394  AST_RWLIST_REMOVE_CURRENT(list);
395  /* If this was the default engine, we need to pick a new one */
396  if (engine == default_engine) {
397  default_engine = AST_RWLIST_FIRST(&engines);
398  }
399  ast_verb(5, "Unregistered speech recognition engine '%s'\n", engine->name);
400  /* All went well */
401  if (on_unregistered) {
402  on_unregistered(engine);
403  }
404  }
405  }
406  AST_RWLIST_TRAVERSE_SAFE_END;
408 }
409 
410 static int unload_module(void)
411 {
412  /* We can not be unloaded */
413  return -1;
414 }
415 
416 static int load_module(void)
417 {
419 }
420 
421 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_GLOBAL_SYMBOLS | AST_MODFLAG_LOAD_ORDER, "Generic Speech Recognition API",
422  .support_level = AST_MODULE_SUPPORT_CORE,
423  .load = load_module,
424  .unload = unload_module,
425  .load_pri = AST_MODPRI_APP_DEPEND - 1,
426 );
int ast_speech_change_state(struct ast_speech *speech, int state)
Change state of a speech structure.
Definition: res_speech.c:278
int state
Definition: speech.h:62
struct ast_speech_result * ast_speech_results_get(struct ast_speech *speech)
Return the results of a recognition from the speech structure.
Definition: res_speech.c:90
Asterisk locking-related definitions:
Asterisk main include file. File version handling, generic pbx functions.
int ast_speech_get_setting(struct ast_speech *speech, const char *name, char *buf, size_t len)
Get an engine specific attribute.
Definition: res_speech.c:175
int ast_speech_write(struct ast_speech *speech, void *data, int len)
Write in signed linear audio to be recognized.
Definition: res_speech.c:144
struct ast_speech_engine * ast_speech_unregister2(const char *engine_name)
Unregister a speech recognition engine.
Definition: res_speech.c:352
int(* destroy)(struct ast_speech *speech)
Definition: speech.h:82
Generic Speech Recognition API.
int(* create)(struct ast_speech *speech, struct ast_format *format)
Definition: speech.h:80
const char * ast_speech_results_type_to_string(enum ast_speech_results_type type)
Convert a speech results type to a string.
Definition: res_speech.c:294
int(* change)(struct ast_speech *speech, const char *name, const char *value)
Definition: speech.h:98
Support for translation of data formats. translate.c.
struct ast_speech_engine * ast_speech_find_engine(const char *engine_name)
Find a speech recognition engine of specified name, if NULL then use the default one.
Definition: res_speech.c:46
int ast_speech_grammar_activate(struct ast_speech *speech, const char *grammar_name)
Activate a loaded (either local or global) grammar.
Definition: res_speech.c:66
#define AST_RWLIST_RDLOCK(head)
Read locks a list.
Definition: linkedlists.h:78
int ast_speech_change_results_type(struct ast_speech *speech, enum ast_speech_results_type results_type)
Change the type of results we want.
Definition: res_speech.c:308
void ast_speech_unregister_engines(int(*should_unregister)(const struct ast_speech_engine *engine, void *data), void *data, void(*on_unregistered)(void *obj))
Unregister all speech recognition engines told to by callback.
Definition: res_speech.c:380
int ast_speech_register(struct ast_speech_engine *engine)
Register a speech recognition engine.
Definition: res_speech.c:316
enum ast_speech_results_type results_type
Definition: speech.h:70
#define AST_LIST_NEXT(elm, field)
Returns the next entry in the list after the given entry.
Definition: linkedlists.h:439
#define AST_RWLIST_WRLOCK(head)
Write locks a list.
Definition: linkedlists.h:52
Definition of a media format.
Definition: format.c:43
void ast_speech_start(struct ast_speech *speech)
Start speech recognition on a speech structure.
Definition: res_speech.c:122
int ast_speech_unregister(const char *engine_name)
Unregister a speech recognition engine.
Definition: res_speech.c:347
char * name
Definition: speech.h:78
int(* start)(struct ast_speech *speech)
Definition: speech.h:96
char * grammar
Definition: speech.h:119
int ast_translator_best_choice(struct ast_format_cap *dst_cap, struct ast_format_cap *src_cap, struct ast_format **dst_fmt_out, struct ast_format **src_fmt_out)
Chooses the best translation path.
Definition: translate.c:1402
int ast_speech_grammar_unload(struct ast_speech *speech, const char *grammar_name)
Unload a local grammar from a speech structure.
Definition: res_speech.c:84
enum ast_format_cmp_res ast_format_cap_iscompatible_format(const struct ast_format_cap *cap, const struct ast_format *format)
Find if ast_format is within the capabilities of the ast_format_cap object.
Definition: format_cap.c:581
#define AST_RWLIST_HEAD_STATIC(name, type)
Defines a structure to be used to hold a read/write list of specified type, statically initialized...
Definition: linkedlists.h:333
#define ao2_bump(obj)
Bump refcount on an AO2 object by one, returning the object.
Definition: astobj2.h:480
int(* unload)(struct ast_speech *speech, const char *grammar_name)
Definition: speech.h:86
General Asterisk PBX channel definitions.
#define ao2_ref(o, delta)
Reference/unreference an object and return the old refcount.
Definition: astobj2.h:459
int ast_speech_grammar_load(struct ast_speech *speech, const char *grammar_name, const char *grammar)
Load a local grammar on a speech structure.
Definition: res_speech.c:78
int ast_speech_change(struct ast_speech *speech, const char *name, const char *value)
Change an engine specific attribute.
Definition: res_speech.c:169
struct ast_format_cap * formats
Definition: speech.h:106
A set of macros to manage forward-linked lists.
int(* change_results_type)(struct ast_speech *speech, enum ast_speech_results_type results_type)
Definition: speech.h:102
struct ast_speech_engine * engine
Definition: speech.h:72
struct ast_speech_result * results
Definition: speech.h:68
#define ast_format_cap_alloc(flags)
Allocate a new ast_format_cap structure.
Definition: format_cap.h:49
struct ast_speech_result *(* get)(struct ast_speech *speech)
Definition: speech.h:104
int(* activate)(struct ast_speech *speech, const char *grammar_name)
Definition: speech.h:88
Format capabilities structure, holds formats + preference order + etc.
Definition: format_cap.c:54
int(* write)(struct ast_speech *speech, void *data, int len)
Definition: speech.h:92
int ast_speech_results_free(struct ast_speech_result *result)
Free a list of results.
Definition: res_speech.c:96
#define ast_calloc(num, len)
A wrapper for calloc()
Definition: astmm.h:202
struct ast_format * format
Definition: speech.h:64
ast_mutex_t lock
Definition: speech.h:56
Standard Command Line Interface.
int(* deactivate)(struct ast_speech *speech, const char *grammar_name)
Definition: speech.h:90
int ast_speech_dtmf(struct ast_speech *speech, const char *dtmf)
Signal to the engine that DTMF was received.
Definition: res_speech.c:154
int(* load)(struct ast_speech *speech, const char *grammar_name, const char *grammar)
Definition: speech.h:84
int(* dtmf)(struct ast_speech *speech, const char *dtmf)
Definition: speech.h:94
char * processing_sound
Definition: speech.h:60
struct ast_speech * ast_speech_new(const char *engine_name, const struct ast_format_cap *cap)
Create a new speech structure using the engine specified.
Definition: res_speech.c:181
Handy terminal functions for vt* terms.
#define AST_RWLIST_UNLOCK(head)
Attempts to unlock a read/write based list.
Definition: linkedlists.h:151
struct ast_format * ast_format_cap_get_format(const struct ast_format_cap *cap, int position)
Get the format at a specific index.
Definition: format_cap.c:400
int ast_speech_destroy(struct ast_speech *speech)
Destroy a speech structure.
Definition: res_speech.c:251
#define ASTERISK_GPL_KEY
The text the key() function should return.
Definition: module.h:46
struct ast_format * ast_format_slin
Built-in cached signed linear 8kHz format.
Definition: format_cache.c:41
Asterisk module definitions.
#define RAII_VAR(vartype, varname, initval, dtor)
Declare a variable that will call a destructor function when it goes out of scope.
Definition: utils.h:941
int ast_format_cap_get_compatible(const struct ast_format_cap *cap1, const struct ast_format_cap *cap2, struct ast_format_cap *result)
Find the compatible formats between two capabilities structures.
Definition: format_cap.c:628
int ast_speech_grammar_deactivate(struct ast_speech *speech, const char *grammar_name)
Deactivate a loaded grammar on a speech structure.
Definition: res_speech.c:72
int(* get_setting)(struct ast_speech *speech, const char *name, char *buf, size_t len)
Definition: speech.h:100
Media Format Cache API.