折腾:
期间,即使要去调研,那家ASR好,也需要先去弄个录音文件才行。
所以还是先去搞清楚web端,html+js中,如何调用麦克风,获取用户说话的语音输入
然后保存起来存成一个文件,也许是临时文件,用于后续测试。
考虑到:
https://ai.baidu.com/docs#/ASR-API/top
支持pcm,wav,amr格式,所以此处保存尽量考虑保存为pcm格式(是不是更加通用,文件大小也比wav小多了?)
【未解决】Chrome中尝试使用input audio file方式去实现获取麦克风录音
再去试试:navigator.getUserMedia()
此处Chrome版本是:版本 66.0.3359.139(正式版本) (64 位)
然后:
中,没有找到:
Enable Media Stream
只有:
media
估计是已经默认支持了?
代码:
$(document).ready(function(){
function hasGetUserMedia() {
// Note: Opera builds are unprefixed.
return !!(navigator.getUserMedia || navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia || navigator.msGetUserMedia);
}
function detectGetUserMedia(){
console.log(navigator);
console.log("Current browser: vendor=%s, platform=%s, userAgent=%s",
navigator.vendor, navigator.platform, navigator.userAgent);
if (hasGetUserMedia()) {
// Good to go!
console.log("current browser support getUserMedia -> allow user speak use microspeaker");
} else {
console.error(‘getUserMedia() is not supported in current browser’);
}
}
detectGetUserMedia();
效果:
Navigator {vendorSub: "", productSub: "20030107", vendor: "Google Inc.", maxTouchPoints: 0, hardwareConcurrency: 4, …}appCodeName: "Mozilla"appName: "Netscape"appVersion: "5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"bluetooth: Bluetooth {}budget: BudgetService {}clipboard: Clipboard {}connection: NetworkInformation {downlink: 6.1, effectiveType: "3g", onchange: null, rtt: 650, saveData: false}cookieEnabled: truecredentials: CredentialsContainer {}deviceMemory: 8doNotTrack: nullgeolocation: Geolocation {}hardwareConcurrency: 4language: "zh-CN"languages: (3) ["zh-CN", "zh", "en"]maxTouchPoints: 0mediaCapabilities: MediaCapabilities {}__proto__: MediaCapabilitiesmediaDevices: MediaDevices {ondevicechange: null}ondevicechange: null__proto__: MediaDevicesmimeTypes: MimeTypeArray {0: MimeType, 1: MimeType, 2: MimeType, 3: MimeType, 4: MimeType, 5: MimeType, application/pdf: MimeType, application/x-google-chrome-pdf: MimeType, application/x-nacl: MimeType, application/x-pnacl: MimeType, application/x-ppapi-vysor: MimeType, …}onLine: truepermissions: Permissions {}platform: "MacIntel"plugins: PluginArray {0: Plugin, 1: Plugin, 2: Plugin, 3: Plugin, Chrome PDF Plugin: Plugin, Chrome PDF Viewer: Plugin, Native Client: Plugin, Widevine Content Decryption Module: Plugin, length: 4}presentation: Presentation {defaultRequest: null, receiver: null}product: "Gecko"productSub: "20030107"serviceWorker: ServiceWorkerContainer {controller: null, ready: Promise, oncontrollerchange: null, onmessage: null}controller: nulloncontrollerchange: nullonmessage: nullready: Promise {<pending>}__proto__: ServiceWorkerContainerstorage: StorageManager {}__proto__: StorageManagerusb: USB {onconnect: null, ondisconnect: null}onconnect: nullondisconnect: null__proto__: USBuserAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"vendor: "Google Inc."vendorSub: ""webkitPersistentStorage: DeprecatedStorageQuota {}webkitTemporaryStorage: DeprecatedStorageQuota {}__proto__: Navigator
main.js:26 Current browser: vendor=Google Inc., platform=MacIntel, userAgent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36
main.js:31 current browser support getUserMedia -> allow user speak use microspeaker
是支持的,那就继续试试
Google Chrome Browser
https://www.w3schools.com/browsers/browsers_chrome.asp
“Google Chrome 21
Chrome 21 was released on August 1, 2012.
Chrome 21 is available for Windows, Mac, and Linux.
New features:
* Support for Apple’s Retina display
* Support for the getUserMedia JavaScript API (allows Web applications to access the user’s webcam and microphone)”
Chrome 21就支持getUserMedia了。
再去试试代码:
function testAudioInput(){
console.log("testAudioInput");
var inputAudio = $("#inputAudio");
var handleSuccess = function(stream) {
console.log("handleSuccess: stream=%o", stream);
if (window.URL) {
inputAudio.src = window.URL.createObjectURL(stream);
} else {
inputAudio.src = stream;
}
console.log("inputAudio.src=%s", inputAudio.src);
};
navigator.mediaDevices
.getUserMedia({ audio: true, video: false })
.then(handleSuccess);
}
testAudioInput();
效果
handleSuccess: stream=MediaStream {id: "OoMEcSIYN6Ka7oznHuLFpbi5FMwm26JmxUkx", active: true, onaddtrack: null, onremovetrack: null, onactive: null, …}active: trueid: "OoMEcSIYN6Ka7oznHuLFpbi5FMwm26JmxUkx"onactive: nullonaddtrack: nulloninactive: nullonremovetrack: null__proto__: MediaStream
main.js:48 [Deprecation] URL.createObjectURL with media streams is deprecated and will be removed in M68, around July 2018. Please use HTMLMediaElement.srcObject instead. See https://www.chromestatus.com/features/5618491470118912 for more details.
handleSuccess @ main.js:48
Promise.then (async)
testAudioInput @ main.js:58
(anonymous) @ main.js:61
fire @ jquery-1.11.1.js:3119
fireWith @ jquery-1.11.1.js:3231
ready @ jquery-1.11.1.js:3443
completed @ jquery-1.11.1.js:3474
main.js:53 inputAudio.src=blob:null/681b3dd0-961d-4431-895c-92f061618ed9
所以先去解决:
且对于HTMLMediaElement.srcObject来说,浏览器支持度还是不错的:
https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement/srcObject
继续参考:
https://developers.google.com/web/fundamentals/media/recording-audio/
去试试getUserMedia的AudioContext:
【未解决】getUserMedia后用AudioContext去实现获取麦克风录音数据
暂时没搞定如何通过AudioContext去录音,看到别人提到了MediaRecorder,感觉更像是我们要的,所以去试试:
【已解决】getUserMedia后用MediaRecorder去获取麦克风录音数据
然后接着去优化一下:
如何保存数据供本地下载
如何把blob编码为pcm,wav,amr,mp3等其他格式
试试其他audio的参数
MediaRecorder() – Web APIs | MDN
试试
audioBitsPerSecond : 128000,
继续参考demo
https://github.com/mozdevs/MediaRecorder-examples
看看是否有其他好用的东西借鉴过来
看起来很不错啊,至少封住接口,且更好用了。
不过参考:
https://medium.com/@bryanjenningz/how-to-record-and-play-audio-in-javascript-faa1b2b3e49b
还是应该
在dataavailable只是保存数据
而在stop时才去后续处理,比如保存数据到audio或文件
所以去优化为:
function testMediaRecorder(mediaStream){
console.log("testMediaRecorder: mediaStream=%o", mediaStream);
const options = {
mimeType: ‘audio/webm’,
audioBitsPerSecond : 128000, //128Kbit/s
};
if (MediaRecorder.isTypeSupported(options.mimeType)) {
console.log("support options=%o", options);
} else {
console.log(options.mimeType + ‘ is not Supported’);
}
const recordedBlobChunks = [];
const mediaRecorder = new MediaRecorder(mediaStream, options);
console.log("mediaRecorder=%o", mediaRecorder);
mediaRecorder.addEventListener(‘dataavailable’, function(e) {
console.log("dataavailable: e.data.size=%d, e.data=%o", e.data.size, e.data);
if (e.data.size > 0) {
recordedBlobChunks.push(e.data);
console.log("recordedBlobChunks=%o", recordedBlobChunks);
}
});
$( "#stopSpeak" ).on( "click", function() {
console.log("#stopSpeak clicked");
mediaRecorder.stop();
});
mediaRecorder.addEventListener(‘stop’, function() {
console.log("mediaRecorder stoped");
playRecordedAudio(recordedBlobChunks);
});
console.log("before start: mediaRecorder.state=%s", mediaRecorder.state);
mediaRecorder.start();
// mediaRecorder.start(100);
console.log("after start: mediaRecorder.state=%s", mediaRecorder.state);
}
function playRecordedAudio(recordedBlobChunks){
console.log("playRecordedAudio: recordedBlobChunks=%o", recordedBlobChunks);
var inputAudio = document.getElementById(‘inputAudio’);
// var inputAudio = $("#inputAudio");
console.log("inputAudio=o", inputAudio);
var allBlob = new Blob(recordedBlobChunks);
console.log("allBlob=o", allBlob);
inputAudio.src = URL.createObjectURL(allBlob);
console.log("inputAudio.src=%o", inputAudio.src);
inputAudio.play();
}
这个是demo:
WebRTC Audio Recording using MediaStreamRecorder
MediaStreamRecorder/audio-recorder.html at master · streamproc/MediaStreamRecorder
暂时想要去自己用上面代码自己处理保存算了
去弄:
【已解决】js中保存MediaRecorder录音的blob数据到文件中并下载
再去搞清楚:
【已解决】js中MediaRecorder录音支持哪些音频格式
而如果想要支持其他格式,则可以去用MediaStreamRecorder
【已解决】js的MediaRecorder的浏览器支持情况和相关polyfill
结论是:
最好还是用MediaStreamRecorder,以便于此处基于MediaRecorder的代码可以在Chrome,Safari等浏览器运行。
【未解决】换用MediaStreamRecorder实现麦克风录音且支持多个浏览器和wav格式
不过看:
https://github.com/streamproc/MediaStreamRecorder
提示说还有另外一个类似的项目:
支持浏览器也很多
demo:
RecordRTC | WebRTC Audio+Video+Screen Recording
有机会再去试试
如果实在不行,可以考虑试试格式抓换:
而具体要转换为别的什么格式,则需要去参考:
找到:
百度
http://ai.baidu.com/docs#/ASR-API/top
pcm(不压缩)
wav(不压缩,pcm编码)
amr(压缩格式)
微软:
https://azure.microsoft.com/zh-cn/services/cognitive-services/speech-to-text/
-》
Get started with the Microsoft Speech Recognition API by using REST | Microsoft Docs
“* Content-type: The Content-type field describes the format and codec of the audio stream. Currently, only WAV file and PCM Mono 16000 encoding is supported. The Content-type value for this format is audio/wav; codec=audio/pcm; samplerate=16000.”
->
wav
pcm