Lists

Terms and Conditions
Lists hosted on this site
Email the Postmaster
Tips for posting to public mailing lists

Speech and AVAudioUnitGenerator callback not working properly?

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Speech and AVAudioUnitGenerator callback not working properly?

Subject: Speech and AVAudioUnitGenerator callback not working properly?
From: Waverly Edwards <email@hidden>
Date: Fri, 30 Jun 2017 20:33:50 +0000
Thread-topic: Speech and AVAudioUnitGenerator callback not working properly?

I am using AVAudioUnitGenerator to produce speech with
kAudioUnitSubType_SpeechSynthesis subtype.

Speech is generated however the callback for kSpeechSpeechDoneCallBack returns
before speech has been completed.



let speechOutput = RenderSpeech_Plain() // this is the expected behavior but
does not use AVAudioUnitGenerator

let speechRender = RenderSpeech_AudioUnit(usesAudioUnit: true) // true exhibits
abnormal behavior

let speechRender = RenderSpeech_AudioUnit(usesAudioUnit: false) // false
exhibits normal behavior but does NOT utilize the audio unit



Is there something that I am overlooking or is this a bug?

If this is a bug, is there a workaround.  The only thing I can come up with is
not even close to sane or always guaranteed to work.



Thank you,





W.



import Foundation

import AVFoundation

import CoreAudio

import AudioToolbox





typealias SRefCon = UnsafeRawPointer



func getSpeechTextArray() -> [String]

{

    var textArray = [String]()



    var strOut = "It is a far, far better thing that I do, than I have ever
done; it is a far, far better rest that I go to, than I have ever known."

    textArray.append(strOut)



    strOut = "Oh yeah baby!  We are just getting started now!"

    textArray.append(strOut)



    strOut = "My mission in life is not merely to survive, but to thrive; and
to do so with some passion, some compassion, some humor, and some style."

    textArray.append(strOut)



    return textArray

}



func speechDoneCallBackProc_AudioUnit(_ inSpeechChannel: SpeechChannel, _
inRefCon: SRefCon)

{

    let classSelfRef = unsafeBitCast(inRefCon, to: RenderSpeech_AudioUnit.self)



    classSelfRef._endTime = CFAbsoluteTimeGetCurrent()

    classSelfRef._testCounter += 1



    print("We are done speaking string # \(classSelfRef._testCounter)")

    print("Elapsed time is \(classSelfRef._endTime - classSelfRef._startTime)
seconds.")



    if (classSelfRef._testCounter < classSelfRef._textArray.count)

    {


classSelfRef.speakText(classSelfRef._textArray[classSelfRef._testCounter])

        classSelfRef._startTime = CFAbsoluteTimeGetCurrent()

    }

}



func speechDoneCallBackProc_Plain(_ inSpeechChannel: SpeechChannel, _ inRefCon:
SRefCon)

{

    let classSelfRef = unsafeBitCast(inRefCon, to: RenderSpeech_Plain.self)



    classSelfRef._endTime = CFAbsoluteTimeGetCurrent()

    classSelfRef._testCounter += 1



    print("We are done speaking string # \(classSelfRef._testCounter)")

    print("Elapsed time is \(classSelfRef._endTime - classSelfRef._startTime)
seconds.")



    if (classSelfRef._testCounter < classSelfRef._textArray.count)

    {


classSelfRef.speakText(classSelfRef._textArray[classSelfRef._testCounter])

        classSelfRef._startTime = CFAbsoluteTimeGetCurrent()

    }

}





class RenderSpeech_AudioUnit: NSObject

{

    var _engine       : AVAudioEngine!

    var _speechChan   : SpeechChannel? = nil

    var _speechGen    : AVAudioUnitGenerator!

    var _startTime    : CFAbsoluteTime = 0

    var _endTime      : CFAbsoluteTime = 0

    var _textArray    = [String]()

    var _testCounter  = 0



    // This is class exhibits ABNORMAL behavior for kSpeechSpeechDoneCallBack
callback if we use audiounit (usesAudioUnit == true)



    func speakText(_ stringToSpeak:String )

    {

        SpeakCFString(_speechChan!, stringToSpeak as CFString, nil)

        _startTime = CFAbsoluteTimeGetCurrent()

    }



    func getSpeechChannel(_ speechUnit:AVAudioUnit) -> SpeechChannel?

    {

        var chan     : SpeechChannel? = nil

        let speechAU = speechUnit.audioUnit

        var propSize = UInt32(MemoryLayout<SpeechChannel>.size)

        let status   = AudioUnitGetProperty(speechAU,
kAudioUnitProperty_SpeechChannel, kAudioUnitScope_Global, 0, &chan, &propSize)

        if (status  != noErr) { print("kAudioUnitProperty_SpeechChannel status:
\(status)") }



        return chan

    }



    func getUnroutedSpeechChannel() -> SpeechChannel?

    {

        // This provides a speech channel but is NOT routed through audio unit

        var chan : SpeechChannel? = nil

        let err = NewSpeechChannel(nil, &chan)  // creating channel with
default system voice

        if ( OSStatus(err)  != noErr) { print("NewSpeechChannel err: \(err)") }



        return chan

    }





    init(usesAudioUnit: Bool)

    {

        super.init()



        _engine     = AVAudioEngine()

        _speechGen  = AVAudioUnitGenerator()

        _textArray  = getSpeechTextArray()



        // passing passUnretained will produce a BAD_EXEC in debugger later

        var status  : OSStatus = noErr

        let selfRef = Unmanaged.passRetained(self).toOpaque()



        var speechCD = AudioComponentDescription()

        speechCD.componentType = kAudioUnitType_Generator

        speechCD.componentSubType = kAudioUnitSubType_SpeechSynthesis

        speechCD.componentManufacturer = kAudioUnitManufacturer_Apple

        speechCD.componentFlags = 0

        speechCD.componentFlagsMask = 0



        _speechGen = AVAudioUnitGenerator(audioComponentDescription: speechCD)



        let reverb = AVAudioUnitReverb()

        reverb.loadFactoryPreset(AVAudioUnitReverbPreset.cathedral)

        reverb.wetDryMix = 25



        _engine.attach(reverb)

        _engine.attach(_speechGen)



        let busFormat = (_engine.mainMixerNode.outputFormat(forBus: 0))

        _engine.connect(_speechGen, to: reverb, format: busFormat)

        _engine.connect(reverb, to: _engine.mainMixerNode, format: busFormat)

        _engine.connect(_engine.mainMixerNode, to: _engine.outputNode, format:
busFormat)



        reverb.bypass = false



        do{

            try _engine.start()

        } catch let error as NSError {

            print("Error engine.start():\(error)")

        }



        if ( usesAudioUnit == true )

        {

            // callback returns EARLY when routed through audio graph, audio
unit

            // speech channel routed through audio unit, callback works
ABNORMALLY

            _speechChan = getSpeechChannel( _speechGen as AVAudioUnit)

        } else {

            // speech channel NOT routed through audio unit, callback works
normally

            // however, we cant obtain any data from render callback because
the graph is not used

            _speechChan = getUnroutedSpeechChannel()

        }



        typealias DoneCallBackType = @convention(c) (SpeechChannel,
SRefCon)->Void

        let callback: DoneCallBackType? = speechDoneCallBackProc_AudioUnit//
can be a procedure or callback

        let callbackAddr = unsafeBitCast(callback, to: UInt.self) as CFNumber



        status = OSStatus(SetSpeechProperty(_speechChan!,
kSpeechSpeechDoneCallBack, callbackAddr))

        if ( status != noErr) { print("SetSpeechProperty,
kSpeechSpeechDoneCallBack status: \(status)") }



        let refCon = unsafeBitCast(selfRef, to: UInt.self) as CFNumber

        status = OSStatus(SetSpeechProperty( _speechChan!,
kSpeechRefConProperty, refCon))

        if ( status != noErr) { print("SetSpeechProperty, kSpeechRefConProperty
status: \(status)") }



        let renderCallback: AURenderCallback = { (

            inRefCon      : UnsafeMutableRawPointer,

            ioActionFlags : UnsafeMutablePointer<AudioUnitRenderActionFlags>,

            inTimeStamp   : UnsafePointer<AudioTimeStamp>,

            inBusNumber   : UInt32,

            inNumberFrames: UInt32,

            ioData        : UnsafeMutablePointer<AudioBufferList>?) -> OSStatus
in



            // The pre-render buffer has been empty, causing us to have gaps in
our capture

            // if (ioActionFlags.pointee ==
AudioUnitRenderActionFlags(rawValue: 4) ) { return noErr }

            if (ioActionFlags.pointee ==
AudioUnitRenderActionFlags.unitRenderAction_PreRender ) { return noErr }



            let inputDataPtr  = UnsafeMutableAudioBufferListPointer(ioData)

            let mBuffers      : AudioBuffer = inputDataPtr![0]

            let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData)

            var bufferIsEmpty = true // very simple test



            if let bptr = bufferPointer {

                let dataArray = bptr.assumingMemoryBound(to: Float.self)

                 for i in 0..<( Int(inNumberFrames)) {

                    // testing to ensure data is going into buffer

                    // we expect this to be empty if we are not routing through
audioUnit

                    if (dataArray[i] != 0) { bufferIsEmpty = false; break }

                }

            }



//            if (bufferIsEmpty == true ) { print("buffer is empty:
\(bufferIsEmpty)") }

            return noErr

        }



        AudioUnitAddRenderNotify(_engine.outputNode.audioUnit!, renderCallback,
selfRef)



        speakText( _textArray[0] )

    }

}







class RenderSpeech_Plain: NSObject

{

    var _endTime      : CFAbsoluteTime = 0

    var _startTime    : CFAbsoluteTime = 0

    var _textArray    = [String]()

    var _speechChan   : SpeechChannel? = nil

    var _testCounter  = 0



    // This is class exhibits normal behavior for kSpeechSpeechDoneCallBack
callback



    func speakText(_ stringToSpeak:String )

    {

        SpeakCFString(_speechChan!, stringToSpeak as CFString, nil)

        _startTime = CFAbsoluteTimeGetCurrent()

    }



    override init()

    {

        super.init()



        // passing passUnretained will produce a BAD_EXEC in debugger later

        var status  : OSStatus = noErr

        _textArray  = getSpeechTextArray()

        let selfRef = Unmanaged.passRetained(self).toOpaque()



        let err = NewSpeechChannel(nil, &_speechChan)  // creating channel with
default system voice

        if ( OSStatus(err)  != noErr) { print("NewSpeechChannel err: \(err)") }



        typealias DoneCallBackType = @convention(c) (SpeechChannel,
SRefCon)->Void

        let callback: DoneCallBackType? = speechDoneCallBackProc_Plain

        let callbackAddr = unsafeBitCast(callback, to: UInt.self) as CFNumber



        status = OSStatus(SetSpeechProperty(_speechChan!,
kSpeechSpeechDoneCallBack, callbackAddr))

        if ( status != noErr) { print("SetSpeechProperty,
kSpeechSpeechDoneCallBack status: \(status)") }



        let refCon = unsafeBitCast(selfRef, to: UInt.self) as CFNumber

        status = OSStatus(SetSpeechProperty( _speechChan!,
kSpeechRefConProperty, refCon))

        if ( status != noErr) { print("SetSpeechProperty, kSpeechRefConProperty
status: \(status)") }



        speakText( _textArray[0] )

    }

}





// let speechOutput = RenderSpeech_Plain() // this is the expected behavior

let speechRender = RenderSpeech_AudioUnit(usesAudioUnit: true) // use boolean
to toggle behavior, true is abnormal, false is normal

 _______________________________________________
Do not post admin requests to the list. They will be ignored.
Coreaudio-api mailing list      (email@hidden)
Help/Unsubscribe/Update your Subscription:

This email sent to email@hidden

Prev by Date: Setting rate on AudioUnit subtype kAudioUnitSubType_NewTimePitch
Previous by thread: Setting rate on AudioUnit subtype kAudioUnitSubType_NewTimePitch
Index(es):
- Date
- Thread