集成思必驰TTS功能

Signed-off-by: chenfufeng <chenfufeng@zhidaoauto.com>
This commit is contained in:
chenfufeng
2021-09-22 15:56:11 +08:00
parent afd7df9d31
commit 05e8d099b9
9 changed files with 195 additions and 22 deletions

Binary file not shown.

Binary file not shown.

View File

@@ -3,17 +3,27 @@ package com.mogo.tts.pad;
import android.content.Context;
import android.content.Intent;
import android.os.Build;
import android.os.Environment;
import android.provider.Settings;
import android.text.TextUtils;
import com.aispeech.AIEchoConfig;
import com.aispeech.AIError;
import com.aispeech.DUILiteConfig;
import com.aispeech.DUILiteSDK;
import com.aispeech.common.AIConstant;
import com.aispeech.export.config.AILocalTTSConfig;
import com.aispeech.export.engines2.AILocalTTSEngine;
import com.aispeech.export.intent.AILocalTTSIntent;
import com.aispeech.export.listeners.AILocalTTSListener;
import com.alibaba.android.arouter.facade.annotation.Route;
import com.mogo.cloud.commons.BuildConfig;
import com.mogo.tts.base.IMogoTTS;
import com.mogo.tts.base.IMogoTTSCallback;
import com.mogo.tts.base.MogoTTSConstants;
import com.mogo.tts.base.PreemptType;
import com.mogo.utils.AppUtils;
import com.mogo.utils.logger.Logger;
import com.mogo.utils.permissions.PermissionsDialogUtils;
import com.zhidao.auto.platform.voice.VoiceClient;
import com.zhidao.voicesdk.MogoVoiceManager;
import com.zhidao.voicesdk.MogoVoiceManagerImpl;
@@ -40,8 +50,8 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
private static final String TAG = "PadTTS";
private String mLastQAndASpeakText;
private boolean mHasFlush = false;
private boolean mInitReady = false;
private boolean mHasFlush = true;
private boolean mInitReady = true;
private Context mContext;
public synchronized void release() {
@@ -57,6 +67,10 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
}
mQAndAMap.clear();
mVoiceClient.release();
if (mEngine != null) {
mEngine.destroy();
}
mHasAuth = false;
mSpeakVoiceMap.clear();
mCacheUnWakeupCommands.clear();
mContext = null;
@@ -73,12 +87,117 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
private Map< String, String[] > mCacheUnWakeupCommands = new ConcurrentHashMap<>();
private static final String TTS_BACK_RES_ZHILING = "zhilingf_common_back_ce_local.v2.1.0.bin";
private int audioRecorderType = DUILiteConfig.TYPE_COMMON_MIC;
//echo module res
private static final String ECHO_RES = "echo/sspe_aec_ch2_mic1_ref1_asr_v2.0.0.95.bin";
//local tts module res
private static final String TTS_DICT_RES = "v2.1.31_aitts_sent_dict_local.db";
private static final String TTS_FRONT_RES = "v2.1.31_local_front.bin";
private AILocalTTSEngine mEngine;
private AILocalTTSIntent mAILocalTTSIntent;
private String[] mBackResBinArray = new String[]{TTS_BACK_RES_ZHILING};
// 单独的语音播放
private boolean mHasAuth;
private void initFlushStatus() {
if ( !mHasFlush ) {
mHasFlush = isVoiceServiceReady( mContext );
}
}
private void initTtsConfig() {
Logger.d(TAG, "initTtsConfig");
// 产品认证需设置 apiKey, productId, productKey, productSecret
DUILiteConfig config = new DUILiteConfig(
"1009e96c95da655dcad8d6eb6145bbdd",
"278586132",
"1fe5930844b488a8d32d9ef7717be7dc",
"f601ecc407986b548ac8ab2a9144162e");
config.setAuthTimeout(5000); //设置授权连接超时时长默认5000ms
config.setExtraParameter("DEVICE_NAME", "fea815f374af8");
config.setExtraParameter("DEVICE_ID", "fea815f374af8");
config.setDeviceProfileDirPath("/sdcard/speech"); // 自定义设置授权文件的保存路径,需要确保该路径事先存在
// config.setThreadAffinity(3); //绑定第3个核降低CPU占用
// config.setOfflineProfileName("auth.txt");//设置assetes目录下的离线授权文件
//设置SDK录音模式
// 单麦 单麦Echo 双麦 线性4麦 环形4麦 环形6麦可选的
config.setAudioRecorderType(audioRecorderType);
if (config.getAudioRecorderType() == DUILiteConfig.TYPE_COMMON_ECHO) {
AIEchoConfig aiEchoConfig = new AIEchoConfig();
aiEchoConfig.setAecResource(ECHO_RES); // 设置echo的AEC资源文件
aiEchoConfig.setChannels(2); //音频总的通道数
aiEchoConfig.setMicNumber(1); //真实mic数
// 默认为1,即左通道为rec录音音频,右通道为play参考音频播放音频
// 若设置为2,通道会互换即左通道为play参考音频播放音频,右通道为rec录音音频
aiEchoConfig.setRecChannel(1);
aiEchoConfig.setSavedDirPath("/sdcard/aispeech/aecPcmFile/");//设置保存的aec原始输入和aec之后的音频文件路径
config.setEchoConfig(aiEchoConfig);
}
if (BuildConfig.DEBUG) {
config.openLog();
// 设置log日志的级别
DUILiteSDK.setDebugMode(3);
}
Logger.d(TAG, "DUILite SDK is isAuthorized " + DUILiteSDK.isAuthorized(mContext));
DUILiteSDK.init(mContext,
config,
new DUILiteSDK.InitListener() {
@Override
public void success() {
Logger.d(TAG, "授权成功!");
mHasAuth = true;
initTtsEngine();
}
@Override
public void error(final String errorCode, final String errorInfo) {
Logger.d(TAG, "授权失败\n\nErrorCode" + errorCode + "\n\nErrorInfo" + errorInfo);
}
});
}
private void initTtsEngine() {
Logger.d(TAG, "initTtsEngine");
if (mEngine != null) {
mEngine.destroy();
}
AILocalTTSConfig config = new AILocalTTSConfig();
// 设置assets目录下合成字典名
config.setDictResource(TTS_DICT_RES);//普通话字典
// 设置合成字典的外部路径,包含文件名(需要手动拷贝到指定目录)
// config.setDictResource("/sdcard/speech/tts/aitts_sent_dict_idx_middle_2.0.4_20180806.db");
// 非必需,用户自定义词典,用于修复离线合成问题,如多音字发音、停顿和数字字母符号读法错误等
// 正常情況用不着这个只有有发音问题需要紧急修复可以改这个资源。user_source.bin该资源由合成组提供的工具包生成。
// config.setUserDictResource("user_source.bin");
// 设置assets目录下前端合成资源名
config.setFrontBinResource(TTS_FRONT_RES);//普通话前端资源
//config.setFrontBinResource(SampleConstants.TTS_FRONT_RES_YUEYU);//粤语前端资源
//config.setFrontBinResource(SampleConstants.TTS_FRONT_RES_SICHUAN);//四川话前端资源
// 设置合成前端资源的外部路径,包含文件名(需要手动拷贝到指定目录)
// config.setFrontBinResource("/sdcard/speech/tts/v2.1.23_local_front.bin");
// default is true
config.setUseCache(false);
//设置后端合成音色资源如果只需设置一个则array只需要传一个成员值就可以
config.addSpeakerResource(mBackResBinArray);
// 设置合成音色的外部路径,包含文件名(需要手动拷贝到指定目录)
// config.addSpeakerResource("/sdcard/speech/tts/zhilingf_common_back_ce_local.v2.1.0.bin");
mEngine = AILocalTTSEngine.createInstance();//创建实例
mEngine.init(config, new AILocalTTSListenerImpl());//初始化合成引擎
mAILocalTTSIntent = new AILocalTTSIntent();
// 设置合成音语速范围为0.52.0
mAILocalTTSIntent.setSpeed(0.85f);
mAILocalTTSIntent.setUseSSML(false); // 设置是否使用ssml合成语法默认为false
mAILocalTTSIntent.setVolume(100); // 设置合成音频的音量范围为1500
// 保存合成音频到指定路径格式为wav
mAILocalTTSIntent.setSaveAudioFilePath(Environment.getExternalStorageDirectory() + "/tts/"
+ System.currentTimeMillis() + ".wav");
}
/**
* 初始化
*/
@@ -190,15 +309,16 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
* @param text
*/
public void speakTTSVoice( String text, IMogoTTSCallback callBack ) {
try {
initFlushStatus();
if ( mHasFlush ) {
mSpeakVoiceMap.put( text, callBack );
mVoiceClient.speakDefault( text );
}
} catch ( Exception e ) {
e.printStackTrace();
}
speakTTSVoice(text);
// try {
// initFlushStatus();
// if ( mHasFlush ) {
// mSpeakVoiceMap.put( text, callBack );
// mVoiceClient.speakDefault( text );
// }
// } catch ( Exception e ) {
// e.printStackTrace();
// }
}
/**
@@ -207,13 +327,12 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
* @param text
*/
public void speakTTSVoice( String text ) {
try {
initFlushStatus();
if ( mHasFlush ) {
mVoiceClient.speakDefault( text );
}
} catch ( Exception e ) {
e.printStackTrace();
Logger.d( TAG, "speakTTSVoice");
if (mEngine != null && mHasAuth) {
// 合成并播放
mEngine.speak(mAILocalTTSIntent, text, "1024");
// 合成音频不播放同时输出实时pcm音频,音频回调在onSynthesizeDataArrived接口
mEngine.synthesize(mAILocalTTSIntent, text, "1024");
}
}
@@ -245,7 +364,8 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
break;
}
}
mVoiceClient.speakTypeText( text, preemptType );
speakTTSVoice(text);
// mVoiceClient.speakTypeText( text, preemptType );
}
} catch ( Exception e ) {
e.printStackTrace();
@@ -261,7 +381,8 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
initFlushStatus();
if ( mHasFlush ) {
mQAndAMap.put( tts, callBack );
mVoiceClient.speakTtsAndRegistCmd( tts );
speakTTSVoice(tts);
// mVoiceClient.speakTtsAndRegistCmd( tts );
}
}
@@ -276,7 +397,8 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
initFlushStatus();
if ( mHasFlush ) {
mQAndAMap.put( tts, callBack );
mVoiceClient.speakTtsAndRegistCmd( tts, okCmds, cancelCmds );
speakTTSVoice(tts);
// mVoiceClient.speakTtsAndRegistCmd( tts, okCmds, cancelCmds );
}
}
@@ -471,6 +593,7 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
mVoiceClient = new VoiceClient( mContext );
mVoiceClient.setCallBack( this );
initFlushStatus();
initTtsConfig();
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
// 检查是否有悬浮窗权限
if (Settings.canDrawOverlays(context)) {
@@ -499,4 +622,54 @@ class PadTTS implements IMogoTTS, VoiceClient.VoiceCmdCallBack, OnTtsListener {
Logger.d( TAG, "status = %d", status );
context.sendBroadcast( intent );
}
private static class AILocalTTSListenerImpl implements AILocalTTSListener {
@Override
public void onInit(int status) {
Logger.d(TAG, "初始化完成,返回值:" + status);
if (status == AIConstant.OPT_SUCCESS) {
Logger.d(TAG, "初始化成功!");
} else {
Logger.d(TAG, "初始化失败!code:" + status);
}
}
@Override
public void onError(String utteranceId, AIError error) {
Logger.d(TAG, "检测到错误:" + error.toString());
}
@Override
public void onSynthesizeStart(String utteranceId) {
Logger.d(TAG, "合成开始");
}
@Override
public void onSynthesizeDataArrived(String utteranceId, byte[] audioData) {
//Log.d(Tag, "合成pcm音频数据:" + audioData.length);
//正常合成结束后会收到size大小为0的audioData,即audioData.length == 0。应用层可以根据该标志停止播放
//若合成过程中取消(stop或release),则不会收到该结束标志
}
@Override
public void onSynthesizeFinish(String utteranceId) {
Logger.d(TAG, "合成结束");
}
@Override
public void onSpeechStart(String utteranceId) {
Logger.d(TAG, "开始播放");
}
@Override
public void onSpeechProgress(int currentTime, int totalTime, boolean isRefTextTTSFinished) {
Logger.d(TAG, "当前:" + currentTime + "ms, 总计:" + totalTime + "ms, 可信度:" + isRefTextTTSFinished);
}
@Override
public void onSpeechFinish(String utteranceId) {
Logger.d(TAG, "播放完成");
}
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.