折腾:
期间,api接口试用成功后,需要把服务集成到Flask后台中。先去集成到本地Flask环境。
期间,明显还要增加后台服务,用已有的celery加上周期性task,去每隔不到10分钟,更新一次JWT的token。
参考:
The Speech Synthesis Markup Language – Microsoft Cognitive Services | Microsoft Docs
Speech service REST APIs | Microsoft Docs
Use Text to Speech using Speech services – Microsoft Cognitive Services | Microsoft Docs
【总结】
然后用代码:
<code>import requests ### TTS ### TTS_AUDIO_SUFFIX = ".mp3" # Use Text to Speech using Speech services - Microsoft Cognitive Services | Microsoft Docs # https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/how-to-text-to-speech MS_TTS_OUTPUT_FORMAT = "audio-16khz-128kbitrate-mono-mp3" # MS_TTS_OUTPUT_FORMAT = "riff-24khz-16bit-mono-pcm" # https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/supported-languages # MS_TTS_SPEAKER = "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)" # MS_TTS_SPEAKER = "Microsoft Server Speech Text to Speech Voice (en-US, JessaRUS)" MS_TTS_SPEAKER = "Microsoft Server Speech Text to Speech Voice (en-US, Jessa24kRUS)" MS_TTS_RATE = "-30.00%" MS_TTS_VOLUME = "+20.00%" # https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/how-to-text-to-speech MS_ERR_BAD_REQUEST = 400 MS_ERR_UNAUTHORIZED = 401 MS_ERR_REQUEST_ENTITY_TOO_LARGE = 413 def initAudioSynthesis(): """ init audio synthesis related: init token :return: """ createAudioTempFolder() # getBaiduToken() getMsToken() refreshMsTokenPeriodic() def getMsToken(): """get ms azure token""" refreshMsToken() def refreshMsToken(): """refresh microsoft azure token for later call tts api""" global app, log, gMsToken log.info("refreshMsToken: gMsToken=%s", gMsToken) getMsTokenUrl = app.config["MS_GET_TOKEN_URL"] reqHeaders = {"Ocp-Apim-Subscription-Key": app.config["MS_TTS_SECRET_KEY"]} log.info("getMsTokenUrl=%s, reqHeaders=%s", getMsTokenUrl, reqHeaders) resp = requests.post(getMsTokenUrl, headers=reqHeaders) log.info("resp=%s", resp) respTokenText = resp.text # eyxxxxiJ9.xxx.xxx log.info("respTokenText=%s", respTokenText) gMsToken = respTokenText # # for debug # gMsToken = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJ1cm46bXMuY29nbml0aXZlc2VydmljZXMiLCJleHAiOiIxNTI3MDU5MTYxIiwicmVnaW9uIjoid2VzdHVzIiwic3Vic2NyaXB0aW9uLWlkIjoiOWQ0MmQ1N2I3YTQ1NDVjOThhZDE0ZDdjOWRhYWNjNjIiLCJwcm9kdWN0LWlkIjoiU3BlZWNoU2VydmljZXMuRnJlZSIsImNvZ25pdGl2ZS1zZXJ2aWNlcy1lbmRwb2ludCI6Imh0dHBzOi8vYXBpLmNvZ25pdGl2ZS5taWNyb3NvZnQuY29tL2ludGVybmFsL3YxLjAvIiwiYXp1cmUtcmVzb3VyY2UtaWQiOiIiLCJzY29wZSI6InNwZWVjaHNlcnZpY2VzIiwiYXVkIjoidXJuOm1zLnNwZWVjaHNlcnZpY2VzLndlc3R1cyJ9.B8_QoDtUtfsQs6OlKXG6p5SC4mm8s0nISdiUyr4Fnez" log.info("gMsToken=%s", gMsToken) def refreshMsTokenPeriodic(): """periodically refresh ms token""" # TODO: add celery periodic task to refresh ms token log.info("refreshMsTokenPeriodic") refreshMsToken() def msTTS(unicodeText): """call ms azure tts to generate audio(mp3/wav/...) from text""" global app, log, gMsToken log.info("msTTS: unicodeText=%s", unicodeText) isOk = False audioBinData = None errNo = 0 errMsg = "Unknown error" msTtsUrl = app.config["MS_TTS_URL"] log.info("msTtsUrl=%s", msTtsUrl) reqHeaders = { "Content-Type": "application/ssml+xml", "X-Microsoft-OutputFormat": MS_TTS_OUTPUT_FORMAT, "Ocp-Apim-Subscription-Key": app.config["MS_TTS_SECRET_KEY"], "Authorization": "Bear " + gMsToken } log.info("reqHeaders=%s", reqHeaders) # # for debug # MS_TTS_SPEAKER = "zhang san" ssmlDataStr = """ <speak version='1.0' xmlns="http://www.w3.org/2001/10/synthesis" xml:lang='en-US'> <voice name='%s'> <prosody rate='%s' volume='%s'> %s </prosody> </voice> </speak> """ % (MS_TTS_SPEAKER, MS_TTS_RATE, MS_TTS_VOLUME, unicodeText) log.info("ssmlDataStr=%s", ssmlDataStr) resp = requests.post(msTtsUrl, headers=reqHeaders, data=ssmlDataStr) log.info("resp=%s", resp) statusCode = resp.status_code log.info("statusCode=%s", statusCode) if statusCode == 200: # respContentType = resp.headers["Content-Type"] # 'audio/x-wav', 'audio/mpeg' # log.info("respContentType=%s", respContentType) # if re.match("audio/.*", respContentType): audioBinData = resp.content log.info("resp content is audio binary data, length=%d", len(audioBinData)) isOk = True errMsg = "" else: isOk = False errNo = resp.status_code errMsg = resp.reason log.error("resp errNo=%d, errMsg=%s", errNo, errMsg) # errNo=400, errMsg=Voice zhang san not supported # errNo=401, errMsg=Unauthorized # errNo=413, errMsg=Content length exceeded the allowed limit of 1024 characters. return isOk, audioBinData, errNo, errMsg def doAudioSynthesis(unicodeText): """ do audio synthesis from unicode text if failed for token invalid/expired, will refresh token to do one more retry """ global app, log, gCurBaiduRespDict isOk = False audioBinData = None errMsg = "" # # for debug # gCurBaiduRespDict["access_token"] = "99.569b3b5b470938a522ce60d2e2ea2506.2592000.1528015602.282335-11192483" log.info("doAudioSynthesis: unicodeText=%s", unicodeText) # isOk, audioBinData, errNo, errMsg = baiduText2Audio(unicodeText) isOk, audioBinData, errNo, errMsg = msTTS(unicodeText) log.info("isOk=%s, errNo=%d, errMsg=%s", isOk, errNo, errMsg) if isOk: errMsg = "" log.info("got synthesized audio binary data length=%d", len(audioBinData)) else: # if errNo == BAIDU_ERR_TOKEN_INVALID: if errNo == MS_ERR_UNAUTHORIZED: log.warning("Token invalid -> retry one for refresh token") # refreshBaiduToken() refreshMsToken() # isOk, audioBinData, errNo, errMsg = baiduText2Audio(unicodeText) isOk, audioBinData, errNo, errMsg = msTTS(unicodeText) log.info("after refresh token: isOk=%ss, errNo=%s, errMsg=%s", isOk, errNo, errMsg) else: log.warning("try synthesized audio occur error: errNo=%d, errMsg=%s", errNo, errMsg) audioBinData = None log.info("return isOk=%s, errMsg=%s", isOk, errMsg) if audioBinData: log.info("audio binary bytes=%d", len(audioBinData)) return isOk, audioBinData, errMsg def testAudioSynthesis(): global app, log, gTempAudioFolder # testInputUnicodeText = u"as a book-collector, i have the story you just want to listen!" testInputUnicodeText = u"but i have an funny story, as well. would you like to listen, very very funny?" # # for debug # testInputUnicodeText = u"but i have an funny story, as well. ttttttttttooooooooolllllllllllooooooooonnnnnnnnnnggggggg but i have an funny story, as well. would you like to listen, very very funny?" isOk, audioBinData, errMsg = doAudioSynthesis(testInputUnicodeText) if isOk: audioBinDataLen = len(audioBinData) log.info("Now will save audio binary data %d bytes to file", audioBinDataLen) # 1. save audio binary data into tmp file newUuid = generateUUID() log.info("newUuid=%s", newUuid) tempFilename = newUuid + TTS_AUDIO_SUFFIX log.info("tempFilename=%s", tempFilename) if not gTempAudioFolder: createAudioTempFolder() tempAudioFullname = os.path.join(gTempAudioFolder, tempFilename) #'/Users/crifan/dev/dev_root/company/naturling/projects/robotDemo/server/tmp/audio/2aba73d1-f8d0-4302-9dd3-d1dbfad44458.mp3' log.info("tempAudioFullname=%s", tempAudioFullname) with open(tempAudioFullname, 'wb') as tmpAudioFp: log.info("tmpAudioFp=%s", tmpAudioFp) tmpAudioFp.write(audioBinData) tmpAudioFp.close() log.info("Done to write audio data into file of %d bytes", audioBinDataLen) log.info("use celery to delay delete tmp file") else: log.warning("Fail to get synthesis audio for errMsg=%s", errMsg) initAudioSynthesis() testAudioSynthesis() </code>
config.py
<code># Audio Synthesis == TTS MS_TTS_SECRET_KEY = "224xxxxx2" MS_GET_TOKEN_URL = "https://westus.api.cognitive.microsoft.com/sts/v1.0/issueToken" MS_TTS_URL = "https://westus.tts.speech.microsoft.com/cognitiveservices/v1" </code>
可以获得对应的token,和返回语音文件:
且调试模拟出错也是返回预期的错误的:
然后接着去:
新增celery的periodical的task,去refresh azure的token
【已解决】Flask中新增Celery周期任务去定期更新Azure的token
转载请注明:在路上 » 【已解决】把微软Azure语言合成TTS集成到Flask本地环境