阅读184 返回首页    go 阿里云


长文本合成Demo__语音合成(TTS)_智能语音交互-阿里云

TTS 长文本合成Demo

长文本合成Demo是为了满足用户有超过200字以上的文本,并需要合成一条语音结果的实现。本Demo基于Java SDK 实现,用户需自定配置Java SDK的调用环境。

SDK 调用:
  1. package com.alibaba.idst.nls.demo;
  2. import com.alibaba.idst.nls.NlsClient;
  3. import com.alibaba.idst.nls.NlsFuture;
  4. import com.alibaba.idst.nls.event.NlsEvent;
  5. import com.alibaba.idst.nls.event.NlsListener;
  6. import com.alibaba.idst.nls.protocol.NlsRequest;
  7. import com.alibaba.idst.nls.protocol.NlsRequestASR;
  8. import com.alibaba.idst.nls.protocol.NlsRequestProto;
  9. import com.alibaba.idst.nls.protocol.NlsResponse;
  10. import com.alibaba.idst.nls.utils.PcmToWav;
  11. import org.slf4j.Logger;
  12. import org.slf4j.LoggerFactory;
  13. import java.io.File;
  14. import java.io.FileInputStream;
  15. import java.io.FileOutputStream;
  16. import java.util.Arrays;
  17. import java.util.UUID;
  18. /**
  19. * Created by songsong.sss on 16/12/12.
  20. */
  21. public class LongTtsDemo implements NlsListener {
  22. static Logger logger = LoggerFactory.getLogger(LongTtsDemo.class);
  23. private NlsClient client = new NlsClient();
  24. public String appKey = null;
  25. public String auth_Id = null;
  26. public String auth_Secret = null;
  27. public String tts_text ;
  28. private String fileName = UUID.randomUUID().toString();
  29. public LongTtsDemo() {
  30. }
  31. public void shutDown() {
  32. logger.info("close NLS client");
  33. client.close();
  34. logger.info("demo done");
  35. }
  36. public void start() {
  37. logger.info("init Nls client...");
  38. client.init();
  39. tts_text = "百草堂与三味书屋 鲁迅 n" +
  40. "我家的后面有一个很大的园,相传叫作百草园。现在是早已并屋子一起卖给朱文公的子孙了,连那最末次的相见也已经隔了七八年,其中似乎确凿只有一些野草;但那时却是我的乐园。n" +
  41. "不必说碧绿的菜畦,光滑的石井栏,高大的皂荚树,紫红的桑葚;也不必说鸣蝉在树叶里长吟,肥胖的黄蜂伏在菜花上,轻捷的叫天子(云雀)忽然从草间直窜向云霄里去了。n"+
  42. "单是周围的短短的泥墙根一带,就有无限趣味。油蛉在这里低唱,蟋蟀们在这里弹琴。翻开断砖来,有时会遇见蜈蚣;还有斑蝥,倘若用手指按住它的嵴梁,便会啪的一声,n"+
  43. "从后窍喷出一阵烟雾。何首乌藤和木莲藤缠络着,木莲有莲房一般的果实,何首乌有臃肿的根。有人说,何首乌根是有像人形的,吃了便可以成仙,我于是常常拔它起来,牵连不断地拔起来,n" +
  44. "也曾因此弄坏了泥墙,却从来没有见过有一块根像人样。如果不怕刺,还可以摘到覆盆子,像小珊瑚珠攒成的小球,又酸又甜,色味都比桑葚要好得远。";
  45. }
  46. public void sayIt() throws Exception {
  47. int ttsTextLength = tts_text.length();
  48. String[] longTexts;
  49. int i = 0;
  50. boolean isHead = false; //标识是否是第一个头文件
  51. String tts_part_text;
  52. File file = new File(fileName+".pcm");
  53. if (!file.exists()) {
  54. try {
  55. file.createNewFile();
  56. } catch (Exception e) {
  57. e.printStackTrace();
  58. }
  59. }
  60. FileOutputStream outputStream = new FileOutputStream(file, true);
  61. longTexts = processLongText(tts_text);
  62. //处理文本,文本长度以50为限,截取为多个文件.
  63. while (ttsTextLength > 0) {
  64. tts_part_text = "";
  65. if (ttsTextLength > 50) {
  66. if (i == 0) {
  67. isHead = true;
  68. } else {
  69. isHead = false;
  70. }
  71. for (; i < longTexts.length; i++) {
  72. tts_part_text = tts_part_text + longTexts[i];
  73. if (i < longTexts.length - 1 && tts_part_text.length() + longTexts[i + 1].length() >= 50) {
  74. i = i + 1;
  75. break;
  76. }
  77. }
  78. } else {
  79. if (i == 0) {
  80. isHead = true;
  81. }
  82. for (; i < longTexts.length; i++) {
  83. tts_part_text = tts_part_text + longTexts[i];
  84. }
  85. }
  86. NlsRequest req = new NlsRequest();
  87. req.setApp_key("nls-service");
  88. req.setTts_req(tts_part_text, "16000");
  89. req.setTtsEncodeType("wav");
  90. req.setTtsVoice("xiaoyun");//男声:xiaogang
  91. req.setTtsVolume(50);
  92. req.setTtsBackgroundMusic(1, 0);
  93. req.authorize(auth_Id, auth_Secret);
  94. NlsFuture future = client.createNlsFuture(req, this);
  95. int total_len = 0;
  96. byte[] data;
  97. while ((data = future.read()) != null) {
  98. if (data.length == 8044 ) {
  99. // 去掉wav头,同时将多条wav转成一条pcm
  100. logger.debug("data length:{} , and head is:{}", (data.length - 44), isHead ? "true" : "false");
  101. outputStream.write(data, 44, data.length - 44);
  102. } else {
  103. outputStream.write(data, 0, data.length);
  104. }
  105. total_len += data.length;
  106. }
  107. logger.info("tts audio file size is :" + total_len);
  108. future.await(10000);
  109. ttsTextLength = ttsTextLength - tts_part_text.length();
  110. }
  111. outputStream.close();
  112. //将pcm转为wav,可以直接播放. 格式为:16kHz采样率,16bit,单声道
  113. PcmToWav.copyWaveFile(fileName+".pcm",fileName+".wav");
  114. logger.debug("close the wav file!");
  115. }
  116. @Override
  117. public void onMessageReceived(NlsEvent e) {
  118. NlsResponse response = e.getResponse();
  119. String result = "";
  120. if (response.getDs_ret() != null) {
  121. result = "get ds result: " + response.getDs_ret();
  122. }
  123. if (response.getAsr_ret() != null) {
  124. result += "nget asr result: " + response.getAsr_ret();
  125. }
  126. if (response.getTts_ret() != null) {
  127. result += "nget tts result: " + response.getTts_ret();
  128. }
  129. if (response.getGds_ret() != null) {
  130. result += "nget gds result: " + response.getGds_ret();
  131. }
  132. if (!result.isEmpty()) {
  133. logger.info(result);
  134. } else if (response.jsonResults != null) {
  135. logger.info(response.jsonResults.toString());
  136. } else {
  137. logger.info("get an acknowledge package from server.");
  138. }
  139. }
  140. @Override
  141. public void onOperationFailed(NlsEvent e) {
  142. logger.error("Error message is: {}, Error code is: {}", e.getErrorMessage(), Integer.valueOf(e.getResponse().getStatus_code()));
  143. }
  144. //切分长文本
  145. public static String[] processLongText(String text) {
  146. text = text.replaceAll("、", "、|");
  147. text = text.replaceAll(",", ",|");
  148. text = text.replaceAll("。", "。|");
  149. text = text.replaceAll(";", ";|");
  150. text = text.replaceAll("?", "?|");
  151. text = text.replaceAll("!", "!|");
  152. text = text.replaceAll(",", ",|");
  153. text = text.replaceAll(";", ";|");
  154. text = text.replaceAll("\?", "?|");
  155. text = text.replaceAll("!", "!|");
  156. String[] texts = text.split("\|");
  157. return texts;
  158. }
  159. @Override
  160. public void onChannelClosed(NlsEvent e) {
  161. logger.info("on websocket closed.");
  162. }
  163. /**
  164. * @param args
  165. */
  166. public static void main(String[] args) throws Exception {
  167. LongTtsDemo lun = new LongTtsDemo();
  168. if (args.length < 4) {
  169. logger.info("NlsDemo <app-key> <Id> <Secret>");
  170. System.exit(-1);
  171. }
  172. lun.appKey = args[0];
  173. lun.auth_Id = args[1];
  174. lun.auth_Secret = args[2];
  175. lun.start();
  176. lun.sayIt();
  177. lun.shutDown();
  178. }
  179. }
pcm 转 wav 工具类:
  1. package com.alibaba.idst.nls.utils;
  2. import java.io.FileInputStream;
  3. import java.io.FileNotFoundException;
  4. import java.io.FileOutputStream;
  5. import java.io.IOException;
  6. /**
  7. * Created by songsong.sss on 2016/12/12.
  8. */
  9. public class PcmToWav {
  10. private static int frequency = 16000;
  11. private static final int RECORDER_BPP = 16;
  12. private static int recBufSize = 640;
  13. public static void copyWaveFile(String inFilename,String outFilename){
  14. FileInputStream in = null;
  15. FileOutputStream out = null;
  16. long totalAudioLen = 0;
  17. long totalDataLen = totalAudioLen + 36;
  18. long longSampleRate = frequency;
  19. int channels = 1;
  20. long byteRate = RECORDER_BPP * frequency * channels/8;
  21. byte[] data = new byte[recBufSize];
  22. try {
  23. in = new FileInputStream(inFilename);
  24. out = new FileOutputStream(outFilename);
  25. totalAudioLen = in.getChannel().size();
  26. totalDataLen = totalAudioLen + 36;
  27. //AppLog.logString("File size: " + totalDataLen);
  28. WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
  29. longSampleRate, channels, byteRate);
  30. while(in.read(data) != -1){
  31. out.write(data);
  32. }
  33. in.close();
  34. out.close();
  35. } catch (FileNotFoundException e) {
  36. e.printStackTrace();
  37. } catch (IOException e) {
  38. e.printStackTrace();
  39. }
  40. }
  41. private static void WriteWaveFileHeader(
  42. FileOutputStream out, long totalAudioLen,
  43. long totalDataLen, long longSampleRate, int channels,
  44. long byteRate) throws IOException {
  45. byte[] header = new byte[44];
  46. header[0] = 'R'; // RIFF/WAVE header
  47. header[1] = 'I';
  48. header[2] = 'F';
  49. header[3] = 'F';
  50. header[4] = (byte) (totalDataLen & 0xff);
  51. header[5] = (byte) ((totalDataLen >> 8) & 0xff);
  52. header[6] = (byte) ((totalDataLen >> 16) & 0xff);
  53. header[7] = (byte) ((totalDataLen >> 24) & 0xff);
  54. header[8] = 'W';
  55. header[9] = 'A';
  56. header[10] = 'V';
  57. header[11] = 'E';
  58. header[12] = 'f'; // 'fmt ' chunk
  59. header[13] = 'm';
  60. header[14] = 't';
  61. header[15] = ' ';
  62. header[16] = 16; // 4 bytes: size of 'fmt ' chunk
  63. header[17] = 0;
  64. header[18] = 0;
  65. header[19] = 0;
  66. header[20] = 1; // format = 1
  67. header[21] = 0;
  68. header[22] = (byte) channels;
  69. header[23] = 0;
  70. header[24] = (byte) (longSampleRate & 0xff);
  71. header[25] = (byte) ((longSampleRate >> 8) & 0xff);
  72. header[26] = (byte) ((longSampleRate >> 16) & 0xff);
  73. header[27] = (byte) ((longSampleRate >> 24) & 0xff);
  74. header[28] = (byte) (byteRate & 0xff);
  75. header[29] = (byte) ((byteRate >> 8) & 0xff);
  76. header[30] = (byte) ((byteRate >> 16) & 0xff);
  77. header[31] = (byte) ((byteRate >> 24) & 0xff);
  78. header[32] = (byte) (1 * 16 / 8); // block align
  79. header[33] = 0;
  80. header[34] = RECORDER_BPP; // bits per sample
  81. header[35] = 0;
  82. header[36] = 'd';
  83. header[37] = 'a';
  84. header[38] = 't';
  85. header[39] = 'a';
  86. header[40] = (byte) (totalAudioLen & 0xff);
  87. header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
  88. header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
  89. header[43] = (byte) ((totalAudioLen >> 24) & 0xff);
  90. out.write(header, 0, 44);
  91. }
  92. }

最后更新:2016-12-13 11:16:18

  上一篇:go 批量合成工具__语音合成(TTS)_智能语音交互-阿里云
  下一篇:go 协议描述语言__语义表示协议_自然语言理解(NLU)_智能语音交互-阿里云