Asterisk - The Open Source Telephony Project  21.4.1
func_talkdetect.c
Go to the documentation of this file.
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2014, Digium, Inc.
5  *
6  * Matt Jordan <mjordan@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18 
19 /*! \file
20  *
21  * \brief Function that raises events when talking is detected on a channel
22  *
23  * \author Matt Jordan <mjordan@digium.com>
24  *
25  * \ingroup functions
26  */
27 
28 /*** MODULEINFO
29  <support_level>core</support_level>
30  ***/
31 
32 #include "asterisk.h"
33 
34 #include "asterisk/module.h"
35 #include "asterisk/channel.h"
36 #include "asterisk/pbx.h"
37 #include "asterisk/app.h"
38 #include "asterisk/dsp.h"
39 #include "asterisk/audiohook.h"
40 #include "asterisk/stasis.h"
41 #include "asterisk/stasis_channels.h"
42 
43 /*** DOCUMENTATION
44  <function name="TALK_DETECT" language="en_US">
45  <since>
46  <version>12.4.0</version>
47  </since>
48  <synopsis>
49  Raises notifications when Asterisk detects silence or talking on a channel.
50  </synopsis>
51  <syntax>
52  <parameter name="action" required="true">
53  <optionlist>
54  <option name="remove">
55  <para>W/O. Remove talk detection from the channel.</para>
56  </option>
57  <option name="set">
58  <para>W/O. Enable TALK_DETECT and/or configure talk detection
59  parameters. Can be called multiple times to change parameters
60  on a channel with talk detection already enabled.</para>
61  <argument name="dsp_silence_threshold" required="false">
62  <para>The time in milliseconds of sound falling below the
63  <replaceable>dsp_talking_threshold</replaceable> option when
64  a user is considered to stop talking. The default value is
65  2500.</para>
66  </argument>
67  <argument name="dsp_talking_threshold" required="false">
68  <para>The minimum average magnitude per sample in a frame
69  for the DSP to consider talking/noise present. A value below
70  this level is considered silence. If not specified, the
71  value comes from the <filename>dsp.conf</filename>
72  <replaceable>silencethreshold</replaceable> option or 256
73  if <filename>dsp.conf</filename> doesn't exist or the
74  <replaceable>silencethreshold</replaceable> option is not
75  set.</para>
76  </argument>
77  </option>
78  </optionlist>
79  </parameter>
80  </syntax>
81  <description>
82  <para>The TALK_DETECT function enables events on the channel
83  it is applied to. These events can be emitted over AMI, ARI, and
84  potentially other Asterisk modules that listen for the internal
85  notification.</para>
86  <para>The function has two parameters that can optionally be passed
87  when <literal>set</literal> on a channel: <replaceable>dsp_talking_threshold</replaceable>
88  and <replaceable>dsp_silence_threshold</replaceable>.</para>
89  <para><replaceable>dsp_talking_threshold</replaceable> is the time in milliseconds of sound
90  above what the dsp has established as base line silence for a user
91  before a user is considered to be talking. By default, the value of
92  <replaceable>silencethreshold</replaceable> from <filename>dsp.conf</filename>
93  is used. If this value is set too tight events may be
94  falsely triggered by variants in room noise.</para>
95  <para>Valid values are 1 through 2^31.</para>
96  <para><replaceable>dsp_silence_threshold</replaceable> is the time in milliseconds of sound
97  falling within what the dsp has established as baseline silence before
98  a user is considered be silent. If this value is set too low events
99  indicating the user has stopped talking may get falsely sent out when
100  the user briefly pauses during mid sentence.</para>
101  <para>The best way to approach this option is to set it slightly above
102  the maximum amount of ms of silence a user may generate during
103  natural speech.</para>
104  <para>By default this value is 2500ms. Valid values are 1
105  through 2^31.</para>
106  <example title="Enable talk detection">
107  same => n,Set(TALK_DETECT(set)=)
108  </example>
109  <example title="Update existing talk detection's silence threshold to 1200 ms">
110  same => n,Set(TALK_DETECT(set)=1200)
111  </example>
112  <example title="Remove talk detection">
113  same => n,Set(TALK_DETECT(remove)=)
114  </example>
115  <example title="Enable and set talk threshold to 128">
116  same => n,Set(TALK_DETECT(set)=,128)
117  </example>
118  <para>This function will set the following variables:</para>
119  <note>
120  <para>The TALK_DETECT function uses an audiohook to inspect the
121  voice media frames on a channel. Other functions, such as JITTERBUFFER,
122  DENOISE, and AGC use a similar mechanism. Audiohooks are processed
123  in the order in which they are placed on the channel. As such,
124  it typically makes sense to place functions that modify the voice
125  media data prior to placing the TALK_DETECT function, as this will
126  yield better results.</para>
127  </note>
128  <example title="Denoise and then perform talk detection">
129  same => n,Set(DENOISE(rx)=on) ; Denoise received audio
130  same => n,Set(TALK_DETECT(set)=) ; Perform talk detection on the denoised received audio
131  </example>
132  </description>
133  </function>
134  ***/
135 
136 #define DEFAULT_SILENCE_THRESHOLD 2500
137 
138 /*! \brief Private data structure used with the function's datastore */
140  /*! The audiohook for the function */
142  /*! Our threshold above which we consider someone talking */
144  /*! How long we'll wait before we decide someone is silent */
146  /*! Whether or not the user is currently talking */
147  int talking;
148  /*! The time the current burst of talking started */
149  struct timeval talking_start;
150  /*! The DSP used to do the heavy lifting */
151  struct ast_dsp *dsp;
152 };
153 
154 /*! \internal \brief Destroy the datastore */
155 static void datastore_destroy_cb(void *data) {
156  struct talk_detect_params *td_params = data;
157 
158  ast_audiohook_destroy(&td_params->audiohook);
159 
160  if (td_params->dsp) {
161  ast_dsp_free(td_params->dsp);
162  }
163  ast_free(data);
164 }
165 
166 /*! \brief The channel datastore the function uses to store state */
168  .type = "talk_detect",
169  .destroy = datastore_destroy_cb
170 };
171 
172 /*! \internal \brief An audiohook modification callback
173  *
174  * This processes the read side of a channel's voice data to see if
175  * they are talking
176  *
177  * \note We don't actually modify the audio, so this function always
178  * returns a 'failure' indicating that it didn't modify the data
179  */
180 static int talk_detect_audiohook_cb(struct ast_audiohook *audiohook, struct ast_channel *chan, struct ast_frame *frame, enum ast_audiohook_direction direction)
181 {
182  int total_silence;
183  int is_talking;
184  int update_talking = 0;
185  struct ast_datastore *datastore;
186  struct talk_detect_params *td_params;
187  struct stasis_message *message;
188 
189  if (audiohook->status == AST_AUDIOHOOK_STATUS_DONE) {
190  return 1;
191  }
192 
193  if (direction != AST_AUDIOHOOK_DIRECTION_READ) {
194  return 1;
195  }
196 
197  if (frame->frametype != AST_FRAME_VOICE) {
198  return 1;
199  }
200 
201  if (!(datastore = ast_channel_datastore_find(chan, &talk_detect_datastore, NULL))) {
202  return 1;
203  }
204  td_params = datastore->data;
205 
206  is_talking = !ast_dsp_silence(td_params->dsp, frame, &total_silence);
207  if (is_talking) {
208  if (!td_params->talking) {
209  update_talking = 1;
210  td_params->talking_start = ast_tvnow();
211  }
212  td_params->talking = 1;
213  } else if (total_silence >= td_params->dsp_silence_threshold) {
214  if (td_params->talking) {
215  update_talking = 1;
216  }
217  td_params->talking = 0;
218  }
219 
220  if (update_talking) {
221  struct ast_json *blob = NULL;
222 
223  if (!td_params->talking) {
224  int64_t diff_ms = ast_tvdiff_ms(ast_tvnow(), td_params->talking_start);
225  diff_ms -= td_params->dsp_silence_threshold;
226 
227  blob = ast_json_pack("{s: I}", "duration", (ast_json_int_t)diff_ms);
228  if (!blob) {
229  return 1;
230  }
231  }
232 
233  ast_verb(4, "%s is now %s\n", ast_channel_name(chan),
234  td_params->talking ? "talking" : "silent");
235  message = ast_channel_blob_create_from_cache(ast_channel_uniqueid(chan),
237  blob);
238  if (message) {
239  stasis_publish(ast_channel_topic(chan), message);
240  ao2_ref(message, -1);
241  }
242 
243  ast_json_unref(blob);
244  }
245 
246  return 1;
247 }
248 
249 /*! \internal \brief Disable talk detection on the channel */
250 static int remove_talk_detect(struct ast_channel *chan)
251 {
252  struct ast_datastore *datastore = NULL;
253  struct talk_detect_params *td_params;
254  SCOPED_CHANNELLOCK(chan_lock, chan);
255 
256  datastore = ast_channel_datastore_find(chan, &talk_detect_datastore, NULL);
257  if (!datastore) {
258  ast_log(AST_LOG_WARNING, "Cannot remove TALK_DETECT from %s: TALK_DETECT not currently enabled\n",
259  ast_channel_name(chan));
260  return -1;
261  }
262  td_params = datastore->data;
263 
264  if (ast_audiohook_remove(chan, &td_params->audiohook)) {
265  ast_log(AST_LOG_WARNING, "Failed to remove TALK_DETECT audiohook from channel %s\n",
266  ast_channel_name(chan));
267  return -1;
268  }
269 
270  if (ast_channel_datastore_remove(chan, datastore)) {
271  ast_log(AST_LOG_WARNING, "Failed to remove TALK_DETECT datastore from channel %s\n",
272  ast_channel_name(chan));
273  return -1;
274  }
275  ast_datastore_free(datastore);
276 
277  return 0;
278 }
279 
280 /*! \internal \brief Enable talk detection on the channel */
281 static int set_talk_detect(struct ast_channel *chan, int dsp_silence_threshold, int dsp_talking_threshold)
282 {
283  struct ast_datastore *datastore = NULL;
284  struct talk_detect_params *td_params;
285  SCOPED_CHANNELLOCK(chan_lock, chan);
286 
287  datastore = ast_channel_datastore_find(chan, &talk_detect_datastore, NULL);
288  if (!datastore) {
289  datastore = ast_datastore_alloc(&talk_detect_datastore, NULL);
290  if (!datastore) {
291  return -1;
292  }
293 
294  td_params = ast_calloc(1, sizeof(*td_params));
295  if (!td_params) {
296  ast_datastore_free(datastore);
297  return -1;
298  }
299 
300  ast_audiohook_init(&td_params->audiohook,
302  "TALK_DETECT",
304  td_params->audiohook.manipulate_callback = talk_detect_audiohook_cb;
305  ast_set_flag(&td_params->audiohook, AST_AUDIOHOOK_TRIGGER_READ);
306 
307  td_params->dsp = ast_dsp_new_with_rate(ast_format_get_sample_rate(ast_channel_rawreadformat(chan)));
308  if (!td_params->dsp) {
309  ast_datastore_free(datastore);
310  ast_free(td_params);
311  return -1;
312  }
313  datastore->data = td_params;
314 
315  ast_channel_datastore_add(chan, datastore);
316  ast_audiohook_attach(chan, &td_params->audiohook);
317  } else {
318  /* Talk detection already enabled; update existing settings */
319  td_params = datastore->data;
320  }
321 
324 
325  ast_dsp_set_threshold(td_params->dsp, td_params->dsp_talking_threshold);
326 
327  return 0;
328 }
329 
330 /*! \internal \brief TALK_DETECT write function callback */
331 static int talk_detect_fn_write(struct ast_channel *chan, const char *function, char *data, const char *value)
332 {
333  int res;
334 
335  if (!chan) {
336  return -1;
337  }
338 
339  if (ast_strlen_zero(data)) {
340  ast_log(AST_LOG_WARNING, "TALK_DETECT requires an argument\n");
341  return -1;
342  }
343 
344  if (!strcasecmp(data, "set")) {
345  int dsp_silence_threshold = DEFAULT_SILENCE_THRESHOLD;
346  int dsp_talking_threshold = ast_dsp_get_threshold_from_settings(THRESHOLD_SILENCE);
347 
348  if (!ast_strlen_zero(value)) {
349  char *parse = ast_strdupa(value);
350 
352  AST_APP_ARG(silence_threshold);
353  AST_APP_ARG(talking_threshold);
354  );
355 
356  AST_STANDARD_APP_ARGS(args, parse);
357 
358  if (!ast_strlen_zero(args.silence_threshold)) {
359  if (sscanf(args.silence_threshold, "%30d", &dsp_silence_threshold) != 1) {
360  ast_log(AST_LOG_WARNING, "Failed to parse %s for dsp_silence_threshold\n",
361  args.silence_threshold);
362  return -1;
363  }
364 
365  if (dsp_silence_threshold < 1) {
366  ast_log(AST_LOG_WARNING, "Invalid value %d for dsp_silence_threshold\n",
367  dsp_silence_threshold);
368  return -1;
369  }
370  }
371 
372  if (!ast_strlen_zero(args.talking_threshold)) {
373  if (sscanf(args.talking_threshold, "%30d", &dsp_talking_threshold) != 1) {
374  ast_log(AST_LOG_WARNING, "Failed to parse %s for dsp_talking_threshold\n",
375  args.talking_threshold);
376  return -1;
377  }
378 
379  if (dsp_talking_threshold < 1) {
380  ast_log(AST_LOG_WARNING, "Invalid value %d for dsp_talking_threshold\n",
381  dsp_talking_threshold);
382  return -1;
383  }
384  }
385  }
386 
387  res = set_talk_detect(chan, dsp_silence_threshold, dsp_talking_threshold);
388  } else if (!strcasecmp(data, "remove")) {
389  res = remove_talk_detect(chan);
390  } else {
391  ast_log(AST_LOG_WARNING, "TALK_DETECT: unknown option %s\n", data);
392  res = -1;
393  }
394 
395  return res;
396 }
397 
398 /*! \brief Definition of the TALK_DETECT function */
400  .name = "TALK_DETECT",
401  .write = talk_detect_fn_write,
402 };
403 
404 /*! \internal \brief Unload the module */
405 static int unload_module(void)
406 {
407  int res = 0;
408 
409  res |= ast_custom_function_unregister(&talk_detect_function);
410 
411  return res;
412 }
413 
414 /*! \internal \brief Load the module */
415 static int load_module(void)
416 {
417  int res = 0;
418 
419  res |= ast_custom_function_register(&talk_detect_function);
420 
422 }
423 
424 AST_MODULE_INFO_STANDARD(ASTERISK_GPL_KEY, "Talk detection dialplan function");
const char * name
Definition: pbx.h:119
const char * type
Definition: datastore.h:32
struct stasis_message_type * ast_channel_talking_stop(void)
Message type for a channel stopping talking.
Main Channel structure associated with a channel.
struct ast_dsp * dsp
Asterisk main include file. File version handling, generic pbx functions.
struct ast_json * ast_json_pack(char const *format,...)
Helper for creating complex JSON values.
Definition: json.c:612
void ast_json_unref(struct ast_json *value)
Decrease refcount on value. If refcount reaches zero, value is freed.
Definition: json.c:73
Convenient Signal Processing routines.
#define AST_STANDARD_APP_ARGS(args, parse)
Performs the 'standard' argument separation process for an application.
Stasis Message Bus API. See Stasis Message Bus API for detailed documentation.
Audiohooks Architecture.
static const int DEFAULT_SILENCE_THRESHOLD
The default silence threshold we will use if an alternate configured value is not present or is inval...
Definition: dsp.c:245
AST_JSON_INT_T ast_json_int_t
Primarily used to cast when packing to an "I" type.
Definition: json.h:87
int ast_audiohook_remove(struct ast_channel *chan, struct ast_audiohook *audiohook)
Remove an audiohook from a specified channel.
Definition: audiohook.c:721
Structure for a data store type.
Definition: datastore.h:31
int ast_audiohook_attach(struct ast_channel *chan, struct ast_audiohook *audiohook)
Attach audiohook to channel.
Definition: audiohook.c:484
struct timeval ast_tvnow(void)
Returns current timeval. Meant to replace calls to gettimeofday().
Definition: time.h:159
int64_t ast_tvdiff_ms(struct timeval end, struct timeval start)
Computes the difference (in milliseconds) between two struct timeval instances.
Definition: time.h:107
Structure for a data store object.
Definition: datastore.h:64
struct ast_datastore * ast_channel_datastore_find(struct ast_channel *chan, const struct ast_datastore_info *info, const char *uid)
Find a datastore on a channel.
Definition: channel.c:2399
struct stasis_message_type * ast_channel_talking_start(void)
Message type for a channel starting talking.
int ast_audiohook_destroy(struct ast_audiohook *audiohook)
Destroys an audiohook structure.
Definition: audiohook.c:124
int ast_custom_function_unregister(struct ast_custom_function *acf)
Unregister a custom function.
int ast_datastore_free(struct ast_datastore *datastore)
Free a data store object.
Definition: datastore.c:68
ast_audiohook_manipulate_callback manipulate_callback
Definition: audiohook.h:118
int ast_audiohook_init(struct ast_audiohook *audiohook, enum ast_audiohook_type type, const char *source, enum ast_audiohook_init_flags flags)
Initialize an audiohook structure.
Definition: audiohook.c:100
#define SCOPED_CHANNELLOCK(varname, chan)
scoped lock specialization for channels.
Definition: lock.h:619
General Asterisk PBX channel definitions.
Definition: dsp.c:407
#define ast_strdupa(s)
duplicate a string in memory from the stack
Definition: astmm.h:298
Data structure associated with a custom dialplan function.
Definition: pbx.h:118
#define ao2_ref(o, delta)
Reference/unreference an object and return the old refcount.
Definition: astobj2.h:459
Core PBX routines and definitions.
Private data structure used with the function's datastore.
void ast_dsp_set_threshold(struct ast_dsp *dsp, int threshold)
Set the minimum average magnitude threshold to determine talking by the DSP.
Definition: dsp.c:1788
struct stasis_topic * ast_channel_topic(struct ast_channel *chan)
A topic which publishes the events for a particular channel.
struct ast_audiohook audiohook
static const struct ast_datastore_info talk_detect_datastore
The channel datastore the function uses to store state.
void stasis_publish(struct stasis_topic *topic, struct stasis_message *message)
Publish a message to a topic's subscribers.
Definition: stasis.c:1511
static struct ast_custom_function talk_detect_function
Definition of the TALK_DETECT function.
#define ast_calloc(num, len)
A wrapper for calloc()
Definition: astmm.h:202
Module has failed to load, may be in an inconsistent state.
Definition: module.h:78
ast_audiohook_direction
Definition: audiohook.h:48
struct stasis_message * ast_channel_blob_create_from_cache(const char *uniqueid, struct stasis_message_type *type, struct ast_json *blob)
Create a ast_channel_blob message, pulling channel state from the cache.
int ast_dsp_silence(struct ast_dsp *dsp, struct ast_frame *f, int *totalsilence)
Process the audio frame for silence.
Definition: dsp.c:1488
void * data
Definition: datastore.h:66
unsigned int ast_format_get_sample_rate(const struct ast_format *format)
Get the sample rate of a media format.
Definition: format.c:379
Data structure associated with a single frame of data.
Abstract JSON element (object, array, string, int, ...).
struct ast_dsp * ast_dsp_new_with_rate(unsigned int sample_rate)
Allocates a new dsp with a specific internal sample rate used during processing.
Definition: dsp.c:1763
enum ast_audiohook_status status
Definition: audiohook.h:108
enum ast_frame_type frametype
#define ASTERISK_GPL_KEY
The text the key() function should return.
Definition: module.h:46
int ast_dsp_get_threshold_from_settings(enum threshold which)
Get silence threshold from dsp.conf.
Definition: dsp.c:2009
Asterisk module definitions.
struct timeval talking_start
int ast_channel_datastore_add(struct ast_channel *chan, struct ast_datastore *datastore)
Add a datastore to a channel.
Definition: channel.c:2385
#define AST_DECLARE_APP_ARGS(name, arglist)
Declare a structure to hold an application's arguments.
Application convenience functions, designed to give consistent look and feel to Asterisk apps...
int ast_channel_datastore_remove(struct ast_channel *chan, struct ast_datastore *datastore)
Remove a datastore from a channel.
Definition: channel.c:2394
#define ast_custom_function_register(acf)
Register a custom function.
Definition: pbx.h:1558
#define AST_APP_ARG(name)
Define an application argument.