184
人物
長文本合成Demo__語音合成(TTS)_智能語音交互-阿裏雲
TTS 長文本合成Demo
長文本合成Demo是為了滿足用戶有超過200字以上的文本,並需要合成一條語音結果的實現。本Demo基於Java SDK 實現,用戶需自定配置Java SDK的調用環境。
SDK 調用:
package com.alibaba.idst.nls.demo;
import com.alibaba.idst.nls.NlsClient;
import com.alibaba.idst.nls.NlsFuture;
import com.alibaba.idst.nls.event.NlsEvent;
import com.alibaba.idst.nls.event.NlsListener;
import com.alibaba.idst.nls.protocol.NlsRequest;
import com.alibaba.idst.nls.protocol.NlsRequestASR;
import com.alibaba.idst.nls.protocol.NlsRequestProto;
import com.alibaba.idst.nls.protocol.NlsResponse;
import com.alibaba.idst.nls.utils.PcmToWav;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Arrays;
import java.util.UUID;
/**
* Created by songsong.sss on 16/12/12.
*/
public class LongTtsDemo implements NlsListener {
static Logger logger = LoggerFactory.getLogger(LongTtsDemo.class);
private NlsClient client = new NlsClient();
public String appKey = null;
public String auth_Id = null;
public String auth_Secret = null;
public String tts_text ;
private String fileName = UUID.randomUUID().toString();
public LongTtsDemo() {
}
public void shutDown() {
logger.info("close NLS client");
client.close();
logger.info("demo done");
}
public void start() {
logger.info("init Nls client...");
client.init();
tts_text = "百草堂與三味書屋 魯迅 n" +
"我家的後麵有一個很大的園,相傳叫作百草園。現在是早已並屋子一起賣給朱文公的子孫了,連那最末次的相見也已經隔了七八年,其中似乎確鑿隻有一些野草;但那時卻是我的樂園。n" +
"不必說碧綠的菜畦,光滑的石井欄,高大的皂莢樹,紫紅的桑葚;也不必說鳴蟬在樹葉裏長吟,肥胖的黃蜂伏在菜花上,輕捷的叫天子(雲雀)忽然從草間直竄向雲霄裏去了。n"+
"單是周圍的短短的泥牆根一帶,就有無限趣味。油蛉在這裏低唱,蟋蟀們在這裏彈琴。翻開斷磚來,有時會遇見蜈蚣;還有斑蝥,倘若用手指按住它的脊梁,便會啪的一聲,n"+
"從後竅噴出一陣煙霧。何首烏藤和木蓮藤纏絡著,木蓮有蓮房一般的果實,何首烏有臃腫的根。有人說,何首烏根是有像人形的,吃了便可以成仙,我於是常常拔它起來,牽連不斷地拔起來,n" +
"也曾因此弄壞了泥牆,卻從來沒有見過有一塊根像人樣。如果不怕刺,還可以摘到覆盆子,像小珊瑚珠攢成的小球,又酸又甜,色味都比桑葚要好得遠。";
}
public void sayIt() throws Exception {
int ttsTextLength = tts_text.length();
String[] longTexts;
int i = 0;
boolean isHead = false; //標識是否是第一個頭文件
String tts_part_text;
File file = new File(fileName+".pcm");
if (!file.exists()) {
try {
file.createNewFile();
} catch (Exception e) {
e.printStackTrace();
}
}
FileOutputStream outputStream = new FileOutputStream(file, true);
longTexts = processLongText(tts_text);
//處理文本,文本長度以50為限,截取為多個文件.
while (ttsTextLength > 0) {
tts_part_text = "";
if (ttsTextLength > 50) {
if (i == 0) {
isHead = true;
} else {
isHead = false;
}
for (; i < longTexts.length; i++) {
tts_part_text = tts_part_text + longTexts[i];
if (i < longTexts.length - 1 && tts_part_text.length() + longTexts[i + 1].length() >= 50) {
i = i + 1;
break;
}
}
} else {
if (i == 0) {
isHead = true;
}
for (; i < longTexts.length; i++) {
tts_part_text = tts_part_text + longTexts[i];
}
}
NlsRequest req = new NlsRequest();
req.setApp_key("nls-service");
req.setTts_req(tts_part_text, "16000");
req.setTtsEncodeType("wav");
req.setTtsVoice("xiaoyun");//男聲:xiaogang
req.setTtsVolume(50);
req.setTtsBackgroundMusic(1, 0);
req.authorize(auth_Id, auth_Secret);
NlsFuture future = client.createNlsFuture(req, this);
int total_len = 0;
byte[] data;
while ((data = future.read()) != null) {
if (data.length == 8044 ) {
// 去掉wav頭,同時將多條wav轉成一條pcm
logger.debug("data length:{} , and head is:{}", (data.length - 44), isHead ? "true" : "false");
outputStream.write(data, 44, data.length - 44);
} else {
outputStream.write(data, 0, data.length);
}
total_len += data.length;
}
logger.info("tts audio file size is :" + total_len);
future.await(10000);
ttsTextLength = ttsTextLength - tts_part_text.length();
}
outputStream.close();
//將pcm轉為wav,可以直接播放. 格式為:16kHz采樣率,16bit,單聲道
PcmToWav.copyWaveFile(fileName+".pcm",fileName+".wav");
logger.debug("close the wav file!");
}
@Override
public void onMessageReceived(NlsEvent e) {
NlsResponse response = e.getResponse();
String result = "";
if (response.getDs_ret() != null) {
result = "get ds result: " + response.getDs_ret();
}
if (response.getAsr_ret() != null) {
result += "nget asr result: " + response.getAsr_ret();
}
if (response.getTts_ret() != null) {
result += "nget tts result: " + response.getTts_ret();
}
if (response.getGds_ret() != null) {
result += "nget gds result: " + response.getGds_ret();
}
if (!result.isEmpty()) {
logger.info(result);
} else if (response.jsonResults != null) {
logger.info(response.jsonResults.toString());
} else {
logger.info("get an acknowledge package from server.");
}
}
@Override
public void onOperationFailed(NlsEvent e) {
logger.error("Error message is: {}, Error code is: {}", e.getErrorMessage(), Integer.valueOf(e.getResponse().getStatus_code()));
}
//切分長文本
public static String[] processLongText(String text) {
text = text.replaceAll("、", "、|");
text = text.replaceAll(",", ",|");
text = text.replaceAll("。", "。|");
text = text.replaceAll(";", ";|");
text = text.replaceAll("?", "?|");
text = text.replaceAll("!", "!|");
text = text.replaceAll(",", ",|");
text = text.replaceAll(";", ";|");
text = text.replaceAll("\?", "?|");
text = text.replaceAll("!", "!|");
String[] texts = text.split("\|");
return texts;
}
@Override
public void onChannelClosed(NlsEvent e) {
logger.info("on websocket closed.");
}
/**
* @param args
*/
public static void main(String[] args) throws Exception {
LongTtsDemo lun = new LongTtsDemo();
if (args.length < 4) {
logger.info("NlsDemo <app-key> <Id> <Secret>");
System.exit(-1);
}
lun.appKey = args[0];
lun.auth_Id = args[1];
lun.auth_Secret = args[2];
lun.start();
lun.sayIt();
lun.shutDown();
}
}
pcm 轉 wav 工具類:
package com.alibaba.idst.nls.utils;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
/**
* Created by songsong.sss on 2016/12/12.
*/
public class PcmToWav {
private static int frequency = 16000;
private static final int RECORDER_BPP = 16;
private static int recBufSize = 640;
public static void copyWaveFile(String inFilename,String outFilename){
FileInputStream in = null;
FileOutputStream out = null;
long totalAudioLen = 0;
long totalDataLen = totalAudioLen + 36;
long longSampleRate = frequency;
int channels = 1;
long byteRate = RECORDER_BPP * frequency * channels/8;
byte[] data = new byte[recBufSize];
try {
in = new FileInputStream(inFilename);
out = new FileOutputStream(outFilename);
totalAudioLen = in.getChannel().size();
totalDataLen = totalAudioLen + 36;
//AppLog.logString("File size: " + totalDataLen);
WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
longSampleRate, channels, byteRate);
while(in.read(data) != -1){
out.write(data);
}
in.close();
out.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static void WriteWaveFileHeader(
FileOutputStream out, long totalAudioLen,
long totalDataLen, long longSampleRate, int channels,
long byteRate) throws IOException {
byte[] header = new byte[44];
header[0] = 'R'; // RIFF/WAVE header
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
header[4] = (byte) (totalDataLen & 0xff);
header[5] = (byte) ((totalDataLen >> 8) & 0xff);
header[6] = (byte) ((totalDataLen >> 16) & 0xff);
header[7] = (byte) ((totalDataLen >> 24) & 0xff);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f'; // 'fmt ' chunk
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = 16; // 4 bytes: size of 'fmt ' chunk
header[17] = 0;
header[18] = 0;
header[19] = 0;
header[20] = 1; // format = 1
header[21] = 0;
header[22] = (byte) channels;
header[23] = 0;
header[24] = (byte) (longSampleRate & 0xff);
header[25] = (byte) ((longSampleRate >> 8) & 0xff);
header[26] = (byte) ((longSampleRate >> 16) & 0xff);
header[27] = (byte) ((longSampleRate >> 24) & 0xff);
header[28] = (byte) (byteRate & 0xff);
header[29] = (byte) ((byteRate >> 8) & 0xff);
header[30] = (byte) ((byteRate >> 16) & 0xff);
header[31] = (byte) ((byteRate >> 24) & 0xff);
header[32] = (byte) (1 * 16 / 8); // block align
header[33] = 0;
header[34] = RECORDER_BPP; // bits per sample
header[35] = 0;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (byte) (totalAudioLen & 0xff);
header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
header[43] = (byte) ((totalAudioLen >> 24) & 0xff);
out.write(header, 0, 44);
}
}
最後更新:2016-12-13 11:16:18
上一篇:
批量合成工具__語音合成(TTS)_智能語音交互-阿裏雲
下一篇:
協議描述語言__語義表示協議_自然語言理解(NLU)_智能語音交互-阿裏雲
常見錯誤說明__附錄_大數據計算服務-阿裏雲
發送短信接口__API使用手冊_短信服務-阿裏雲
接口文檔__Android_安全組件教程_移動安全-阿裏雲
運營商錯誤碼(聯通)__常見問題_短信服務-阿裏雲
設置短信模板__使用手冊_短信服務-阿裏雲
OSS 權限問題及排查__常見錯誤及排除_最佳實踐_對象存儲 OSS-阿裏雲
消息通知__操作指南_批量計算-阿裏雲
設備端快速接入(MQTT)__快速開始_阿裏雲物聯網套件-阿裏雲
查詢API調用流量數據__API管理相關接口_API_API 網關-阿裏雲
使用STS訪問__JavaScript-SDK_SDK 參考_對象存儲 OSS-阿裏雲