Speech and AVAudioUnitGenerator callback not working properly?
Speech and AVAudioUnitGenerator callback not working properly?
- Subject: Speech and AVAudioUnitGenerator callback not working properly?
- From: Waverly Edwards <email@hidden>
- Date: Fri, 30 Jun 2017 20:33:50 +0000
- Thread-topic: Speech and AVAudioUnitGenerator callback not working properly?
I am using AVAudioUnitGenerator to produce speech with
kAudioUnitSubType_SpeechSynthesis subtype.
Speech is generated however the callback for kSpeechSpeechDoneCallBack returns
before speech has been completed.
let speechOutput = RenderSpeech_Plain() // this is the expected behavior but
does not use AVAudioUnitGenerator
let speechRender = RenderSpeech_AudioUnit(usesAudioUnit: true) // true exhibits
abnormal behavior
let speechRender = RenderSpeech_AudioUnit(usesAudioUnit: false) // false
exhibits normal behavior but does NOT utilize the audio unit
Is there something that I am overlooking or is this a bug?
If this is a bug, is there a workaround. The only thing I can come up with is
not even close to sane or always guaranteed to work.
Thank you,
import Foundation
import AVFoundation
import CoreAudio
import AudioToolbox
typealias SRefCon = UnsafeRawPointer
func getSpeechTextArray() -> [String]
var textArray = [String]()
var strOut = "It is a far, far better thing that I do, than I have ever
done; it is a far, far better rest that I go to, than I have ever known."
strOut = "Oh yeah baby! We are just getting started now!"
strOut = "My mission in life is not merely to survive, but to thrive; and
to do so with some passion, some compassion, some humor, and some style."
return textArray
func speechDoneCallBackProc_AudioUnit(_ inSpeechChannel: SpeechChannel, _
inRefCon: SRefCon)
let classSelfRef = unsafeBitCast(inRefCon, to: RenderSpeech_AudioUnit.self)
classSelfRef._endTime = CFAbsoluteTimeGetCurrent()
classSelfRef._testCounter += 1
print("We are done speaking string # \(classSelfRef._testCounter)")
print("Elapsed time is \(classSelfRef._endTime - classSelfRef._startTime)
if (classSelfRef._testCounter < classSelfRef._textArray.count)
classSelfRef._startTime = CFAbsoluteTimeGetCurrent()
func speechDoneCallBackProc_Plain(_ inSpeechChannel: SpeechChannel, _ inRefCon:
let classSelfRef = unsafeBitCast(inRefCon, to: RenderSpeech_Plain.self)
classSelfRef._endTime = CFAbsoluteTimeGetCurrent()
classSelfRef._testCounter += 1
print("We are done speaking string # \(classSelfRef._testCounter)")
print("Elapsed time is \(classSelfRef._endTime - classSelfRef._startTime)
if (classSelfRef._testCounter < classSelfRef._textArray.count)
classSelfRef._startTime = CFAbsoluteTimeGetCurrent()
class RenderSpeech_AudioUnit: NSObject
var _engine : AVAudioEngine!
var _speechChan : SpeechChannel? = nil
var _speechGen : AVAudioUnitGenerator!
var _startTime : CFAbsoluteTime = 0
var _endTime : CFAbsoluteTime = 0
var _textArray = [String]()
var _testCounter = 0
// This is class exhibits ABNORMAL behavior for kSpeechSpeechDoneCallBack
callback if we use audiounit (usesAudioUnit == true)
func speakText(_ stringToSpeak:String )
SpeakCFString(_speechChan!, stringToSpeak as CFString, nil)
_startTime = CFAbsoluteTimeGetCurrent()
func getSpeechChannel(_ speechUnit:AVAudioUnit) -> SpeechChannel?
var chan : SpeechChannel? = nil
let speechAU = speechUnit.audioUnit
var propSize = UInt32(MemoryLayout<SpeechChannel>.size)
let status = AudioUnitGetProperty(speechAU,
kAudioUnitProperty_SpeechChannel, kAudioUnitScope_Global, 0, &chan, &propSize)
if (status != noErr) { print("kAudioUnitProperty_SpeechChannel status:
\(status)") }
return chan
func getUnroutedSpeechChannel() -> SpeechChannel?
// This provides a speech channel but is NOT routed through audio unit
var chan : SpeechChannel? = nil
let err = NewSpeechChannel(nil, &chan) // creating channel with
default system voice
if ( OSStatus(err) != noErr) { print("NewSpeechChannel err: \(err)") }
return chan
init(usesAudioUnit: Bool)
_engine = AVAudioEngine()
_speechGen = AVAudioUnitGenerator()
_textArray = getSpeechTextArray()
// passing passUnretained will produce a BAD_EXEC in debugger later
var status : OSStatus = noErr
let selfRef = Unmanaged.passRetained(self).toOpaque()
var speechCD = AudioComponentDescription()
speechCD.componentType = kAudioUnitType_Generator
speechCD.componentSubType = kAudioUnitSubType_SpeechSynthesis
speechCD.componentManufacturer = kAudioUnitManufacturer_Apple
speechCD.componentFlags = 0
speechCD.componentFlagsMask = 0
_speechGen = AVAudioUnitGenerator(audioComponentDescription: speechCD)
let reverb = AVAudioUnitReverb()
reverb.wetDryMix = 25
let busFormat = (_engine.mainMixerNode.outputFormat(forBus: 0))
_engine.connect(_speechGen, to: reverb, format: busFormat)
_engine.connect(reverb, to: _engine.mainMixerNode, format: busFormat)
_engine.connect(_engine.mainMixerNode, to: _engine.outputNode, format:
reverb.bypass = false
try _engine.start()
} catch let error as NSError {
print("Error engine.start():\(error)")
if ( usesAudioUnit == true )
// callback returns EARLY when routed through audio graph, audio
// speech channel routed through audio unit, callback works
_speechChan = getSpeechChannel( _speechGen as AVAudioUnit)
} else {
// speech channel NOT routed through audio unit, callback works
// however, we cant obtain any data from render callback because
the graph is not used
_speechChan = getUnroutedSpeechChannel()
typealias DoneCallBackType = @convention(c) (SpeechChannel,
let callback: DoneCallBackType? = speechDoneCallBackProc_AudioUnit//
can be a procedure or callback
let callbackAddr = unsafeBitCast(callback, to: UInt.self) as CFNumber
status = OSStatus(SetSpeechProperty(_speechChan!,
kSpeechSpeechDoneCallBack, callbackAddr))
if ( status != noErr) { print("SetSpeechProperty,
kSpeechSpeechDoneCallBack status: \(status)") }
let refCon = unsafeBitCast(selfRef, to: UInt.self) as CFNumber
status = OSStatus(SetSpeechProperty( _speechChan!,
kSpeechRefConProperty, refCon))
if ( status != noErr) { print("SetSpeechProperty, kSpeechRefConProperty
status: \(status)") }
let renderCallback: AURenderCallback = { (
inRefCon : UnsafeMutableRawPointer,
ioActionFlags : UnsafeMutablePointer<AudioUnitRenderActionFlags>,
inTimeStamp : UnsafePointer<AudioTimeStamp>,
inBusNumber : UInt32,
inNumberFrames: UInt32,
ioData : UnsafeMutablePointer<AudioBufferList>?) -> OSStatus
// The pre-render buffer has been empty, causing us to have gaps in
our capture
// if (ioActionFlags.pointee ==
AudioUnitRenderActionFlags(rawValue: 4) ) { return noErr }
if (ioActionFlags.pointee ==
AudioUnitRenderActionFlags.unitRenderAction_PreRender ) { return noErr }
let inputDataPtr = UnsafeMutableAudioBufferListPointer(ioData)
let mBuffers : AudioBuffer = inputDataPtr![0]
let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData)
var bufferIsEmpty = true // very simple test
if let bptr = bufferPointer {
let dataArray = bptr.assumingMemoryBound(to: Float.self)
for i in 0..<( Int(inNumberFrames)) {
// testing to ensure data is going into buffer
// we expect this to be empty if we are not routing through
if (dataArray[i] != 0) { bufferIsEmpty = false; break }
// if (bufferIsEmpty == true ) { print("buffer is empty:
\(bufferIsEmpty)") }
return noErr
AudioUnitAddRenderNotify(_engine.outputNode.audioUnit!, renderCallback,
speakText( _textArray[0] )
class RenderSpeech_Plain: NSObject
var _endTime : CFAbsoluteTime = 0
var _startTime : CFAbsoluteTime = 0
var _textArray = [String]()
var _speechChan : SpeechChannel? = nil
var _testCounter = 0
// This is class exhibits normal behavior for kSpeechSpeechDoneCallBack
func speakText(_ stringToSpeak:String )
SpeakCFString(_speechChan!, stringToSpeak as CFString, nil)
_startTime = CFAbsoluteTimeGetCurrent()
override init()
// passing passUnretained will produce a BAD_EXEC in debugger later
var status : OSStatus = noErr
_textArray = getSpeechTextArray()
let selfRef = Unmanaged.passRetained(self).toOpaque()
let err = NewSpeechChannel(nil, &_speechChan) // creating channel with
default system voice
if ( OSStatus(err) != noErr) { print("NewSpeechChannel err: \(err)") }
typealias DoneCallBackType = @convention(c) (SpeechChannel,
let callback: DoneCallBackType? = speechDoneCallBackProc_Plain
let callbackAddr = unsafeBitCast(callback, to: UInt.self) as CFNumber
status = OSStatus(SetSpeechProperty(_speechChan!,
kSpeechSpeechDoneCallBack, callbackAddr))
if ( status != noErr) { print("SetSpeechProperty,
kSpeechSpeechDoneCallBack status: \(status)") }
let refCon = unsafeBitCast(selfRef, to: UInt.self) as CFNumber
status = OSStatus(SetSpeechProperty( _speechChan!,
kSpeechRefConProperty, refCon))
if ( status != noErr) { print("SetSpeechProperty, kSpeechRefConProperty
status: \(status)") }
speakText( _textArray[0] )
// let speechOutput = RenderSpeech_Plain() // this is the expected behavior
let speechRender = RenderSpeech_AudioUnit(usesAudioUnit: true) // use boolean
to toggle behavior, true is abnormal, false is normal
Do not post admin requests to the list. They will be ignored.
Coreaudio-api mailing list (email@hidden)
Help/Unsubscribe/Update your Subscription:
This email sent to email@hidden