184
阿里云
长文本合成Demo__语音合成(TTS)_智能语音交互-阿里云
TTS 长文本合成Demo
长文本合成Demo是为了满足用户有超过200字以上的文本,并需要合成一条语音结果的实现。本Demo基于Java SDK 实现,用户需自定配置Java SDK的调用环境。
SDK 调用:
package com.alibaba.idst.nls.demo;
import com.alibaba.idst.nls.NlsClient;
import com.alibaba.idst.nls.NlsFuture;
import com.alibaba.idst.nls.event.NlsEvent;
import com.alibaba.idst.nls.event.NlsListener;
import com.alibaba.idst.nls.protocol.NlsRequest;
import com.alibaba.idst.nls.protocol.NlsRequestASR;
import com.alibaba.idst.nls.protocol.NlsRequestProto;
import com.alibaba.idst.nls.protocol.NlsResponse;
import com.alibaba.idst.nls.utils.PcmToWav;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Arrays;
import java.util.UUID;
/**
* Created by songsong.sss on 16/12/12.
*/
public class LongTtsDemo implements NlsListener {
static Logger logger = LoggerFactory.getLogger(LongTtsDemo.class);
private NlsClient client = new NlsClient();
public String appKey = null;
public String auth_Id = null;
public String auth_Secret = null;
public String tts_text ;
private String fileName = UUID.randomUUID().toString();
public LongTtsDemo() {
}
public void shutDown() {
logger.info("close NLS client");
client.close();
logger.info("demo done");
}
public void start() {
logger.info("init Nls client...");
client.init();
tts_text = "百草堂与三味书屋 鲁迅 n" +
"我家的后面有一个很大的园,相传叫作百草园。现在是早已并屋子一起卖给朱文公的子孙了,连那最末次的相见也已经隔了七八年,其中似乎确凿只有一些野草;但那时却是我的乐园。n" +
"不必说碧绿的菜畦,光滑的石井栏,高大的皂荚树,紫红的桑葚;也不必说鸣蝉在树叶里长吟,肥胖的黄蜂伏在菜花上,轻捷的叫天子(云雀)忽然从草间直窜向云霄里去了。n"+
"单是周围的短短的泥墙根一带,就有无限趣味。油蛉在这里低唱,蟋蟀们在这里弹琴。翻开断砖来,有时会遇见蜈蚣;还有斑蝥,倘若用手指按住它的嵴梁,便会啪的一声,n"+
"从后窍喷出一阵烟雾。何首乌藤和木莲藤缠络着,木莲有莲房一般的果实,何首乌有臃肿的根。有人说,何首乌根是有像人形的,吃了便可以成仙,我于是常常拔它起来,牵连不断地拔起来,n" +
"也曾因此弄坏了泥墙,却从来没有见过有一块根像人样。如果不怕刺,还可以摘到覆盆子,像小珊瑚珠攒成的小球,又酸又甜,色味都比桑葚要好得远。";
}
public void sayIt() throws Exception {
int ttsTextLength = tts_text.length();
String[] longTexts;
int i = 0;
boolean isHead = false; //标识是否是第一个头文件
String tts_part_text;
File file = new File(fileName+".pcm");
if (!file.exists()) {
try {
file.createNewFile();
} catch (Exception e) {
e.printStackTrace();
}
}
FileOutputStream outputStream = new FileOutputStream(file, true);
longTexts = processLongText(tts_text);
//处理文本,文本长度以50为限,截取为多个文件.
while (ttsTextLength > 0) {
tts_part_text = "";
if (ttsTextLength > 50) {
if (i == 0) {
isHead = true;
} else {
isHead = false;
}
for (; i < longTexts.length; i++) {
tts_part_text = tts_part_text + longTexts[i];
if (i < longTexts.length - 1 && tts_part_text.length() + longTexts[i + 1].length() >= 50) {
i = i + 1;
break;
}
}
} else {
if (i == 0) {
isHead = true;
}
for (; i < longTexts.length; i++) {
tts_part_text = tts_part_text + longTexts[i];
}
}
NlsRequest req = new NlsRequest();
req.setApp_key("nls-service");
req.setTts_req(tts_part_text, "16000");
req.setTtsEncodeType("wav");
req.setTtsVoice("xiaoyun");//男声:xiaogang
req.setTtsVolume(50);
req.setTtsBackgroundMusic(1, 0);
req.authorize(auth_Id, auth_Secret);
NlsFuture future = client.createNlsFuture(req, this);
int total_len = 0;
byte[] data;
while ((data = future.read()) != null) {
if (data.length == 8044 ) {
// 去掉wav头,同时将多条wav转成一条pcm
logger.debug("data length:{} , and head is:{}", (data.length - 44), isHead ? "true" : "false");
outputStream.write(data, 44, data.length - 44);
} else {
outputStream.write(data, 0, data.length);
}
total_len += data.length;
}
logger.info("tts audio file size is :" + total_len);
future.await(10000);
ttsTextLength = ttsTextLength - tts_part_text.length();
}
outputStream.close();
//将pcm转为wav,可以直接播放. 格式为:16kHz采样率,16bit,单声道
PcmToWav.copyWaveFile(fileName+".pcm",fileName+".wav");
logger.debug("close the wav file!");
}
@Override
public void onMessageReceived(NlsEvent e) {
NlsResponse response = e.getResponse();
String result = "";
if (response.getDs_ret() != null) {
result = "get ds result: " + response.getDs_ret();
}
if (response.getAsr_ret() != null) {
result += "nget asr result: " + response.getAsr_ret();
}
if (response.getTts_ret() != null) {
result += "nget tts result: " + response.getTts_ret();
}
if (response.getGds_ret() != null) {
result += "nget gds result: " + response.getGds_ret();
}
if (!result.isEmpty()) {
logger.info(result);
} else if (response.jsonResults != null) {
logger.info(response.jsonResults.toString());
} else {
logger.info("get an acknowledge package from server.");
}
}
@Override
public void onOperationFailed(NlsEvent e) {
logger.error("Error message is: {}, Error code is: {}", e.getErrorMessage(), Integer.valueOf(e.getResponse().getStatus_code()));
}
//切分长文本
public static String[] processLongText(String text) {
text = text.replaceAll("、", "、|");
text = text.replaceAll(",", ",|");
text = text.replaceAll("。", "。|");
text = text.replaceAll(";", ";|");
text = text.replaceAll("?", "?|");
text = text.replaceAll("!", "!|");
text = text.replaceAll(",", ",|");
text = text.replaceAll(";", ";|");
text = text.replaceAll("\?", "?|");
text = text.replaceAll("!", "!|");
String[] texts = text.split("\|");
return texts;
}
@Override
public void onChannelClosed(NlsEvent e) {
logger.info("on websocket closed.");
}
/**
* @param args
*/
public static void main(String[] args) throws Exception {
LongTtsDemo lun = new LongTtsDemo();
if (args.length < 4) {
logger.info("NlsDemo <app-key> <Id> <Secret>");
System.exit(-1);
}
lun.appKey = args[0];
lun.auth_Id = args[1];
lun.auth_Secret = args[2];
lun.start();
lun.sayIt();
lun.shutDown();
}
}
pcm 转 wav 工具类:
package com.alibaba.idst.nls.utils;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
/**
* Created by songsong.sss on 2016/12/12.
*/
public class PcmToWav {
private static int frequency = 16000;
private static final int RECORDER_BPP = 16;
private static int recBufSize = 640;
public static void copyWaveFile(String inFilename,String outFilename){
FileInputStream in = null;
FileOutputStream out = null;
long totalAudioLen = 0;
long totalDataLen = totalAudioLen + 36;
long longSampleRate = frequency;
int channels = 1;
long byteRate = RECORDER_BPP * frequency * channels/8;
byte[] data = new byte[recBufSize];
try {
in = new FileInputStream(inFilename);
out = new FileOutputStream(outFilename);
totalAudioLen = in.getChannel().size();
totalDataLen = totalAudioLen + 36;
//AppLog.logString("File size: " + totalDataLen);
WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
longSampleRate, channels, byteRate);
while(in.read(data) != -1){
out.write(data);
}
in.close();
out.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static void WriteWaveFileHeader(
FileOutputStream out, long totalAudioLen,
long totalDataLen, long longSampleRate, int channels,
long byteRate) throws IOException {
byte[] header = new byte[44];
header[0] = 'R'; // RIFF/WAVE header
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
header[4] = (byte) (totalDataLen & 0xff);
header[5] = (byte) ((totalDataLen >> 8) & 0xff);
header[6] = (byte) ((totalDataLen >> 16) & 0xff);
header[7] = (byte) ((totalDataLen >> 24) & 0xff);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f'; // 'fmt ' chunk
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = 16; // 4 bytes: size of 'fmt ' chunk
header[17] = 0;
header[18] = 0;
header[19] = 0;
header[20] = 1; // format = 1
header[21] = 0;
header[22] = (byte) channels;
header[23] = 0;
header[24] = (byte) (longSampleRate & 0xff);
header[25] = (byte) ((longSampleRate >> 8) & 0xff);
header[26] = (byte) ((longSampleRate >> 16) & 0xff);
header[27] = (byte) ((longSampleRate >> 24) & 0xff);
header[28] = (byte) (byteRate & 0xff);
header[29] = (byte) ((byteRate >> 8) & 0xff);
header[30] = (byte) ((byteRate >> 16) & 0xff);
header[31] = (byte) ((byteRate >> 24) & 0xff);
header[32] = (byte) (1 * 16 / 8); // block align
header[33] = 0;
header[34] = RECORDER_BPP; // bits per sample
header[35] = 0;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (byte) (totalAudioLen & 0xff);
header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
header[43] = (byte) ((totalAudioLen >> 24) & 0xff);
out.write(header, 0, 44);
}
}
最后更新:2016-12-13 11:16:18
上一篇:
批量合成工具__语音合成(TTS)_智能语音交互-阿里云
下一篇:
协议描述语言__语义表示协议_自然语言理解(NLU)_智能语音交互-阿里云
常见错误说明__附录_大数据计算服务-阿里云
发送短信接口__API使用手册_短信服务-阿里云
接口文档__Android_安全组件教程_移动安全-阿里云
运营商错误码(联通)__常见问题_短信服务-阿里云
设置短信模板__使用手册_短信服务-阿里云
OSS 权限问题及排查__常见错误及排除_最佳实践_对象存储 OSS-阿里云
消息通知__操作指南_批量计算-阿里云
设备端快速接入(MQTT)__快速开始_阿里云物联网套件-阿里云
查询API调用流量数据__API管理相关接口_API_API 网关-阿里云
使用STS访问__JavaScript-SDK_SDK 参考_对象存储 OSS-阿里云