audio/utilities/AudioDetect.cc

	AudioDetect.cc revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 1993-2001 by Sun Microsystems, Inc.
 * All rights reserved.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <malloc.h>
#include <stdlib.h>
#include <memory.h>
#include <AudioBuffer.h>
#include <AudioLib.h>
#include <AudioDetect.h>
#include <silence_detect.h>

// XXX - temporary: manual data conversion
#include <AudioTypePcm.h>

// class AudioDetectArray methods

// Allocation increment for array
static const unsigned int	ARRAY_INCR = 50;

// Minimum time for detection algorithm is 250 milliseconds
static const double		MIN_DURATION = .250;
// The detection algorithm needs at least 20 msecs more than silence duration
static const double		DURATION_INCR = .022;
// Minimum silence for detection algorithm is fine grain
// XXX - not used for now
static const double		MIN_SILENCE = .05;

// Constructor
AudioDetectArray::
AudioDetectArray(
	unsigned int	cnt):		// start size of array
	count(0)
{
	size = cnt;
	if (size == 0)
		size = ARRAY_INCR;	// set default size
	pts = (AudioDetectPts*)malloc(size * sizeof (*pts));
}

// Destructor
AudioDetectArray::
~AudioDetectArray()
{
	if (pts != NULL)
		(void) free((char *)pts);
}

// Append a list of detection points to the array
AudioError AudioDetectArray::
appendpts(
	AudioDetectPts*	newpts,		// new array to append
	unsigned int	cnt)		// number of points to append
{
	// If cnt == -1, append until eof
	if (cnt == -1) {
		AudioDetectPts*	cp;

		cnt = 1;
		cp = newpts;
		while (cp->type != DETECT_EOF) {
			cnt++;
			cp++;
		}
	}

	// Loop through, appending each new point in turn
	while (cnt-- > 0) {
		if (count == size) {
			// Time to allocate more space in array
			size += ARRAY_INCR;
			pts = (AudioDetectPts*)
			    realloc((char *)pts, (size * sizeof (*pts)));
			if (pts == NULL)
				return (AUDIO_UNIXERROR);
		}
		pts[count++] = *newpts++;
	}
	return (AUDIO_SUCCESS);
}

// Process the detection array, eliminating eofs and collapsing adjacent entries
void AudioDetectArray::
reduce()
{
	AudioDetectPts*		ip;	// input pointer
	AudioDetectPts*		op;	// output pointer
	unsigned int		ocnt;	// output counter

	// Start input and output pointers together
	ip = pts;
	op = pts;
	ocnt = 0;

	// If no entries in the array, make at least one
	if (count == 0) {
		op->pos = 0.;
		op->type = DETECT_EOF;
		count = 1;
		return;
	}

	// Get a legitimate starting point
	while ((ip->type == DETECT_EOF) && (count-- > 0)) {
		*op = *ip++;	// copy eof, in case it's the last
	}
	if (count <= 0) {
		count = 1;	// already copied final eof
		return;
	}

	// Copy first entry
	*op++ = *ip++;
	ocnt++;
	count--;

	// Collapse the array
	while (count-- > 0) {
		if (ip->type == op[-1].type) {
			// Eliminate adjacent entries of same type
			ip++;
		} else if ((ip->type == DETECT_EOF) && (count > 0)) {
			// Remove non-final eofs
			ip++;
		} else {
			*op++ = *ip++;
			ocnt++;
		}
	}
	count = ocnt;		// set final count
}

// Copy the detection array to a new array
AudioError AudioDetectArray::
duparray(
	AudioDetectPts*&	cp)	// set pointer to new array
{
	// Allocate new array of adequate size
	cp = (AudioDetectPts*)malloc(count * sizeof (*pts));
	if (cp == NULL)
		return (AUDIO_UNIXERROR);

	// Copy array
	(void) memcpy((char *)cp, (char *)pts, (count * sizeof (*pts)));
	return (AUDIO_SUCCESS);
}


// class AudioDetect methods


// Constructor
AudioDetect::
AudioDetect()
{
	min_sound = .3;
	min_silence = .2;
	state = (void*)silence_create_state(8000, min_silence);
	thresh_scale = silence_get_thr_scale((SIL_STATE*)state);
	noise_ratio = silence_get_noise_ratio((SIL_STATE*)state);
}

// Destructor
AudioDetect::
~AudioDetect()
{
	silence_destroy_state((SIL_STATE*)state);
}


// Get parameters for the audio detection algorithm.
// Each call retrieves one parameter.  The parameter is identified
// by the 'type' argument.  'Valp' is a pointer to the new value.
AudioError AudioDetect::
GetParam(
	AudioDetectConfig type,		// type flag
	Double&		val)		// address of value
{
	switch (type) {
	case DETECT_MINIMUM_SILENCE:
		val = min_silence;
		break;
	case DETECT_MINIMUM_SOUND:
		val = min_sound;
		break;
	case DETECT_THRESHOLD_SCALE:
		val = thresh_scale;
		break;
	case DETECT_NOISE_RATIO:
		val = noise_ratio;
		break;
	default:
		return (AUDIO_ERR_BADARG);
	}
	return (AUDIO_SUCCESS);
}

// Set parameters for the audio detection algorithm.
// Each call adjusts one parameter.  The parameter is identified
// by the 'type' argument.  'Valp' is a pointer to the new value.
AudioError AudioDetect::
SetParam(
	AudioDetectConfig type,		// type flag
	Double		val)		// value
{
	if (val < 0.)
		return (AUDIO_ERR_BADARG);

	switch (type) {
	case DETECT_MINIMUM_SILENCE:
		min_silence = val;
		break;
	case DETECT_MINIMUM_SOUND:
		min_sound = val;
		break;
	case DETECT_THRESHOLD_SCALE:
		thresh_scale = val;
		break;
	case DETECT_NOISE_RATIO:
		if (val > 1.)
			return (AUDIO_ERR_BADARG);
		noise_ratio = val;
		break;
	default:
		return (AUDIO_ERR_BADARG);
	}
	return (AUDIO_SUCCESS);
}

// Entry stubs for invocations with missing arguments
AudioError AudioDetect::
Analyze(
	AudioDetectPts*& pts,		// value array to modify
	Audio*		obj)		// AudioList, or whatever
{
	Double		from;		// starting offset
	Double		to;		// ending offset

	return (Analyze(pts, obj, from = 0., to = AUDIO_UNKNOWN_TIME));
}

AudioError AudioDetect::
Analyze(
	AudioDetectPts*& pts,		// value array to modify
	Audio*		obj,		// AudioList, or whatever
	Double		from)		// starting offset
{
	Double		to;		// ending offset

	return (Analyze(pts, obj, from, to = AUDIO_UNKNOWN_TIME));
}

// Process data from a given Audio object, filling in the 'vals' structure.
// If from and to identify a subset region for which there are
// already valid markers in the 'vals' structure, 'vals' is updated.
// For instance, suppose a 60 second file has already been mapped out.
// Now a PASTE operation inserts 10 seconds right in the middle.
// Step through the vals structure, adding 10 (the insert length) to
// every time greater than 30 (the insert point).  Then call this routine
// with (from, to) set to (30, 40).  The vals structure will be
// updated by reading the minimum required amount of data (which will,
// however, be a little longer than 10 seconds in order to get the
// transitions right).
// Returns audio error code or AUDIO_SUCCESS.
// This routine deallocates the input copy of 'pts', so make sure
// it is a copy of non-volatile storage, if necessary.
AudioError AudioDetect::
Analyze(
	AudioDetectPts*& pts,		// value array to modify
	Audio*		obj,		// AudioList, or whatever
	Double		from,		// starting offset [0.]
	Double		to)		// ending offset [AUDIO_UNKNOWN_TIME]
{
	Double		maxdur;		// length of minimum sample interval
	AudioDetectArray* ap;		// new value array
	AudioDetectPts*	oldpts;		// saved input array
	AudioDetectPts*	list;
	AudioDetectPts*	aptr;
	unsigned int	cnt;
	AudioError	err;

	ap = new AudioDetectArray;
	if (ap == NULL)
		return (AUDIO_UNIXERROR);
	oldpts = pts;
	pts = NULL;

	// Get largest of the minimum time parameters
	maxdur = min_silence;
	if (min_sound > maxdur)
		maxdur = min_sound;
	maxdur += DURATION_INCR;
	if (maxdur < MIN_DURATION)
		maxdur =  MIN_DURATION;

	// Adjust starting time
	from -= maxdur;
	if (from < 0.)
		from = 0.;

	// Adjust ending time
	if (!Undefined(to))
		to += maxdur;

	// If replacing virtually the whole array, skip trying to update
	if ((oldpts != NULL) &&
	    ((oldpts->type == DETECT_EOF) ||
	    ((from < MIN_DURATION) && (Undefined(to))))) {
		(void) free((char *)oldpts);
		oldpts = NULL;
		from = 0.;
	}

	// If time is a subset of an existing list, copy out the first entries
	list = oldpts;
	if ((list != NULL) && (from > 0.)) {
		cnt = 0;
		while ((list->type != AUDIO_EOF) && (from > list->pos)) {
			cnt++;
			list++;
		}
		if (err = ap->appendpts(oldpts, cnt))
			goto error_ret;
	}

	// Analyze the specified region of data
	if (err = analyzeappend(ap, obj, from, to, maxdur))
		goto error_ret;

	// If time is a subset of an existing list, copy out the last entries
	if ((list != NULL) && (!Undefined(to))) {
		while (list->type != AUDIO_EOF) {
			if (list->pos >= to) {
				ap->appendpts(list, 1);
				list++;
			}
		}
	}

	// Compress and copy the list
	ap->reduce();


	// Eliminate silence segments that are under their length threshold
	for (aptr = ap->pts; aptr->type != DETECT_EOF; aptr++) {
		if ((aptr->type == DETECT_SILENCE) &&
		    ((aptr[1].pos - aptr->pos) < min_silence))
			aptr->type = DETECT_SOUND;
	}
	ap->reduce();

	// Eliminate sound segments that are under their length threshold
	for (aptr = ap->pts; aptr->type != DETECT_EOF; aptr++) {
		if ((aptr->type == DETECT_SOUND) &&
		    ((aptr[1].pos - aptr->pos) < min_sound))
			aptr->type = DETECT_SILENCE;
	}
	ap->reduce();

	// Caller must free the returned array
	// XXX - maybe can arrange for caller to dup it instead
	err = ap->duparray(pts);

error_ret:
	// Throw away interim structures
	if (oldpts != NULL)
		(void) free((char *)oldpts);
	delete ap;
	return (err);
}

// Audio detection main anaylze loop
AudioError AudioDetect::
analyzeappend(
	AudioDetectArray*& aptr,		// value array to modify
	Audio*		obj,			// AudioList, or whatever
	Double		from,			// starting offset
	Double		to,			// ending offset
	Double		mintime)		// minimum analysis buffer size
{
	AudioBuffer*	buf;
	AudioBuffer*	abp;
	AudioHdr	hdr;
	Double		start;
	Double		len;
	Double		minbuf;
	Double		tmpend = 0.;
	unsigned int	bufsiz;
	off_t		offset;
	unsigned int	npts;
	int		i;
	int		valid;
	END_POINTS*	ep;
	AudioDetectPts	apt;
	AudioError	err;

	buf = NULL;
	abp = NULL;
	offset = 0;
	start = from;
	minbuf = mintime * 10.;			// Process buffer length

	// Start out assuming non-silence
	apt.type = DETECT_SOUND;
	apt.pos = from;
	if (err = aptr->appendpts(&apt, 1))
		return (err);

	// If eof, set an eof marker
	if (!Undefined(obj->GetLength()) && (from >= obj->GetLength())) {
		goto no_data;
	}

	hdr = obj->GetDHeader(from);
	hdr.sample_rate = 0;		// dummy value for now

	// Init detection state
	silence_set_min_sil_dur(min_silence, (SIL_STATE*)state);
	silence_set_thr_scale(thresh_scale, (SIL_STATE*)state);
	silence_set_noise_ratio(noise_ratio, (SIL_STATE*)state);
	silence_init_state((SIL_STATE*)state);

	// Loop while there is data to read
	do {
		// If eof, we're done
		if (!Undefined(obj->GetLength()) &&
		    (from >= obj->GetLength())) {
			break;
		}

		// If the current sample rate does not match the old, re-init
		if (obj->GetDHeader(from).sample_rate != hdr.sample_rate) {
			hdr.sample_rate = obj->GetDHeader(from).sample_rate;
			if (buf != NULL)
				buf->Dereference();

			// Allocate a new holding buffer
			buf = new AudioBuffer(minbuf);
			if (buf == NULL)
				return (AUDIO_UNIXERROR);
			buf->Reference();
			if (err = buf->SetHeader(hdr))
				goto error_ret;

			// Update algorithm state
			silence_set_rate(hdr.sample_rate, (SIL_STATE*)state);
		}

		// Limit the copy to the buffer length or remaining data time
		len = minbuf;
		if (!Undefined(to)) {
			if (len > (to - from)) {
				len = to - from;
			}
		}

		// Copy one region of data
		// XXX - Duplicate bufptr to hold onto it (for now)
		//    tmpend should = 0. from initialization area

		// tmpend should be cleared each time before calling.
		// Bug ID 4034048	DPT 24-Feb-97
		tmpend = 0.;
		err = AudioAsyncCopy(obj, buf, from, tmpend, len);

		// XXX - Temporary: Convert to linear manually
		if (!err) {
			AudioTypePcm	conv;		// XXX - temporary
			AudioHdr	newhdr = hdr;

			abp = buf;
			// only convert if not LINEAR pcm
			if (buf->GetHeader().encoding != LINEAR) {
				newhdr.bytes_per_unit = 2;
				newhdr.encoding = LINEAR;
				err = conv.Convert(abp, newhdr);
				if (!err)
					abp->Reference();
			} else {
				abp->Reference();
			}
		}

		if (!err) {
			// Process data through signal detection routine
			bufsiz = (unsigned int) hdr.Time_to_Samples(len);
			npts = bufsiz;
			i = silence_detect((short *)abp->GetAddress(), &ep,
			    &npts, &valid, (SIL_STATE*)state);
			abp->Dereference();		// XXX - temporary
			if (i == SILENCE_ERR_BUFFER_TOO_SMALL) {
				// If buffer too small, go on
				err = AUDIO_SUCCESS;
			} else if (i == SILENCE_ERR_REALLOC_FAILED) {
				err = AUDIO_UNIXERROR;
			}

			// Convert endpoints to AudioDetectPts
			if (!err) {
				// First, add byte offset to all 'times'
				// Then, create entries for silence regions
				for (i = 0; i < npts; i++) {
					ep[i].ep_start += (int)offset;
					apt.pos = start +
					    hdr.Samples_to_Time(ep[i].ep_start);
					apt.type = DETECT_SILENCE;
					if (err = aptr->appendpts(&apt, 1))
						goto error_ret;

					// If the end of silence is the same
					// as the end of processed data,
					// don't start sound yet
					if (ep[i].ep_end == valid)
						break;
					ep[i].ep_end += (int)offset;
					apt.pos = start +
					    hdr.Samples_to_Time(ep[i].ep_end);
					apt.type = DETECT_SOUND;
					if (err = aptr->appendpts(&apt, 1))
						goto error_ret;
				}
				offset += bufsiz;
			}
		}
	} while (!err);

no_data:
	// Set end-of-file marker to latest position
	if (!err || (err == AUDIO_EOF)) {
		apt.type = DETECT_EOF;
		apt.pos = from;
		err = aptr->appendpts(&apt, 1);
	}

error_ret:
	if (buf != NULL)
		buf->Dereference();
	return (err);
}