閱讀184 返回首頁    go 人物


長文本合成Demo__語音合成(TTS)_智能語音交互-阿裏雲

TTS 長文本合成Demo

長文本合成Demo是為了滿足用戶有超過200字以上的文本,並需要合成一條語音結果的實現。本Demo基於Java SDK 實現,用戶需自定配置Java SDK的調用環境。

SDK 調用:
  1. package com.alibaba.idst.nls.demo;
  2. import com.alibaba.idst.nls.NlsClient;
  3. import com.alibaba.idst.nls.NlsFuture;
  4. import com.alibaba.idst.nls.event.NlsEvent;
  5. import com.alibaba.idst.nls.event.NlsListener;
  6. import com.alibaba.idst.nls.protocol.NlsRequest;
  7. import com.alibaba.idst.nls.protocol.NlsRequestASR;
  8. import com.alibaba.idst.nls.protocol.NlsRequestProto;
  9. import com.alibaba.idst.nls.protocol.NlsResponse;
  10. import com.alibaba.idst.nls.utils.PcmToWav;
  11. import org.slf4j.Logger;
  12. import org.slf4j.LoggerFactory;
  13. import java.io.File;
  14. import java.io.FileInputStream;
  15. import java.io.FileOutputStream;
  16. import java.util.Arrays;
  17. import java.util.UUID;
  18. /**
  19. * Created by songsong.sss on 16/12/12.
  20. */
  21. public class LongTtsDemo implements NlsListener {
  22. static Logger logger = LoggerFactory.getLogger(LongTtsDemo.class);
  23. private NlsClient client = new NlsClient();
  24. public String appKey = null;
  25. public String auth_Id = null;
  26. public String auth_Secret = null;
  27. public String tts_text ;
  28. private String fileName = UUID.randomUUID().toString();
  29. public LongTtsDemo() {
  30. }
  31. public void shutDown() {
  32. logger.info("close NLS client");
  33. client.close();
  34. logger.info("demo done");
  35. }
  36. public void start() {
  37. logger.info("init Nls client...");
  38. client.init();
  39. tts_text = "百草堂與三味書屋 魯迅 n" +
  40. "我家的後麵有一個很大的園,相傳叫作百草園。現在是早已並屋子一起賣給朱文公的子孫了,連那最末次的相見也已經隔了七八年,其中似乎確鑿隻有一些野草;但那時卻是我的樂園。n" +
  41. "不必說碧綠的菜畦,光滑的石井欄,高大的皂莢樹,紫紅的桑葚;也不必說鳴蟬在樹葉裏長吟,肥胖的黃蜂伏在菜花上,輕捷的叫天子(雲雀)忽然從草間直竄向雲霄裏去了。n"+
  42. "單是周圍的短短的泥牆根一帶,就有無限趣味。油蛉在這裏低唱,蟋蟀們在這裏彈琴。翻開斷磚來,有時會遇見蜈蚣;還有斑蝥,倘若用手指按住它的脊梁,便會啪的一聲,n"+
  43. "從後竅噴出一陣煙霧。何首烏藤和木蓮藤纏絡著,木蓮有蓮房一般的果實,何首烏有臃腫的根。有人說,何首烏根是有像人形的,吃了便可以成仙,我於是常常拔它起來,牽連不斷地拔起來,n" +
  44. "也曾因此弄壞了泥牆,卻從來沒有見過有一塊根像人樣。如果不怕刺,還可以摘到覆盆子,像小珊瑚珠攢成的小球,又酸又甜,色味都比桑葚要好得遠。";
  45. }
  46. public void sayIt() throws Exception {
  47. int ttsTextLength = tts_text.length();
  48. String[] longTexts;
  49. int i = 0;
  50. boolean isHead = false; //標識是否是第一個頭文件
  51. String tts_part_text;
  52. File file = new File(fileName+".pcm");
  53. if (!file.exists()) {
  54. try {
  55. file.createNewFile();
  56. } catch (Exception e) {
  57. e.printStackTrace();
  58. }
  59. }
  60. FileOutputStream outputStream = new FileOutputStream(file, true);
  61. longTexts = processLongText(tts_text);
  62. //處理文本,文本長度以50為限,截取為多個文件.
  63. while (ttsTextLength > 0) {
  64. tts_part_text = "";
  65. if (ttsTextLength > 50) {
  66. if (i == 0) {
  67. isHead = true;
  68. } else {
  69. isHead = false;
  70. }
  71. for (; i < longTexts.length; i++) {
  72. tts_part_text = tts_part_text + longTexts[i];
  73. if (i < longTexts.length - 1 && tts_part_text.length() + longTexts[i + 1].length() >= 50) {
  74. i = i + 1;
  75. break;
  76. }
  77. }
  78. } else {
  79. if (i == 0) {
  80. isHead = true;
  81. }
  82. for (; i < longTexts.length; i++) {
  83. tts_part_text = tts_part_text + longTexts[i];
  84. }
  85. }
  86. NlsRequest req = new NlsRequest();
  87. req.setApp_key("nls-service");
  88. req.setTts_req(tts_part_text, "16000");
  89. req.setTtsEncodeType("wav");
  90. req.setTtsVoice("xiaoyun");//男聲:xiaogang
  91. req.setTtsVolume(50);
  92. req.setTtsBackgroundMusic(1, 0);
  93. req.authorize(auth_Id, auth_Secret);
  94. NlsFuture future = client.createNlsFuture(req, this);
  95. int total_len = 0;
  96. byte[] data;
  97. while ((data = future.read()) != null) {
  98. if (data.length == 8044 ) {
  99. // 去掉wav頭,同時將多條wav轉成一條pcm
  100. logger.debug("data length:{} , and head is:{}", (data.length - 44), isHead ? "true" : "false");
  101. outputStream.write(data, 44, data.length - 44);
  102. } else {
  103. outputStream.write(data, 0, data.length);
  104. }
  105. total_len += data.length;
  106. }
  107. logger.info("tts audio file size is :" + total_len);
  108. future.await(10000);
  109. ttsTextLength = ttsTextLength - tts_part_text.length();
  110. }
  111. outputStream.close();
  112. //將pcm轉為wav,可以直接播放. 格式為:16kHz采樣率,16bit,單聲道
  113. PcmToWav.copyWaveFile(fileName+".pcm",fileName+".wav");
  114. logger.debug("close the wav file!");
  115. }
  116. @Override
  117. public void onMessageReceived(NlsEvent e) {
  118. NlsResponse response = e.getResponse();
  119. String result = "";
  120. if (response.getDs_ret() != null) {
  121. result = "get ds result: " + response.getDs_ret();
  122. }
  123. if (response.getAsr_ret() != null) {
  124. result += "nget asr result: " + response.getAsr_ret();
  125. }
  126. if (response.getTts_ret() != null) {
  127. result += "nget tts result: " + response.getTts_ret();
  128. }
  129. if (response.getGds_ret() != null) {
  130. result += "nget gds result: " + response.getGds_ret();
  131. }
  132. if (!result.isEmpty()) {
  133. logger.info(result);
  134. } else if (response.jsonResults != null) {
  135. logger.info(response.jsonResults.toString());
  136. } else {
  137. logger.info("get an acknowledge package from server.");
  138. }
  139. }
  140. @Override
  141. public void onOperationFailed(NlsEvent e) {
  142. logger.error("Error message is: {}, Error code is: {}", e.getErrorMessage(), Integer.valueOf(e.getResponse().getStatus_code()));
  143. }
  144. //切分長文本
  145. public static String[] processLongText(String text) {
  146. text = text.replaceAll("、", "、|");
  147. text = text.replaceAll(",", ",|");
  148. text = text.replaceAll("。", "。|");
  149. text = text.replaceAll(";", ";|");
  150. text = text.replaceAll("?", "?|");
  151. text = text.replaceAll("!", "!|");
  152. text = text.replaceAll(",", ",|");
  153. text = text.replaceAll(";", ";|");
  154. text = text.replaceAll("\?", "?|");
  155. text = text.replaceAll("!", "!|");
  156. String[] texts = text.split("\|");
  157. return texts;
  158. }
  159. @Override
  160. public void onChannelClosed(NlsEvent e) {
  161. logger.info("on websocket closed.");
  162. }
  163. /**
  164. * @param args
  165. */
  166. public static void main(String[] args) throws Exception {
  167. LongTtsDemo lun = new LongTtsDemo();
  168. if (args.length < 4) {
  169. logger.info("NlsDemo <app-key> <Id> <Secret>");
  170. System.exit(-1);
  171. }
  172. lun.appKey = args[0];
  173. lun.auth_Id = args[1];
  174. lun.auth_Secret = args[2];
  175. lun.start();
  176. lun.sayIt();
  177. lun.shutDown();
  178. }
  179. }
pcm 轉 wav 工具類:
  1. package com.alibaba.idst.nls.utils;
  2. import java.io.FileInputStream;
  3. import java.io.FileNotFoundException;
  4. import java.io.FileOutputStream;
  5. import java.io.IOException;
  6. /**
  7. * Created by songsong.sss on 2016/12/12.
  8. */
  9. public class PcmToWav {
  10. private static int frequency = 16000;
  11. private static final int RECORDER_BPP = 16;
  12. private static int recBufSize = 640;
  13. public static void copyWaveFile(String inFilename,String outFilename){
  14. FileInputStream in = null;
  15. FileOutputStream out = null;
  16. long totalAudioLen = 0;
  17. long totalDataLen = totalAudioLen + 36;
  18. long longSampleRate = frequency;
  19. int channels = 1;
  20. long byteRate = RECORDER_BPP * frequency * channels/8;
  21. byte[] data = new byte[recBufSize];
  22. try {
  23. in = new FileInputStream(inFilename);
  24. out = new FileOutputStream(outFilename);
  25. totalAudioLen = in.getChannel().size();
  26. totalDataLen = totalAudioLen + 36;
  27. //AppLog.logString("File size: " + totalDataLen);
  28. WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
  29. longSampleRate, channels, byteRate);
  30. while(in.read(data) != -1){
  31. out.write(data);
  32. }
  33. in.close();
  34. out.close();
  35. } catch (FileNotFoundException e) {
  36. e.printStackTrace();
  37. } catch (IOException e) {
  38. e.printStackTrace();
  39. }
  40. }
  41. private static void WriteWaveFileHeader(
  42. FileOutputStream out, long totalAudioLen,
  43. long totalDataLen, long longSampleRate, int channels,
  44. long byteRate) throws IOException {
  45. byte[] header = new byte[44];
  46. header[0] = 'R'; // RIFF/WAVE header
  47. header[1] = 'I';
  48. header[2] = 'F';
  49. header[3] = 'F';
  50. header[4] = (byte) (totalDataLen & 0xff);
  51. header[5] = (byte) ((totalDataLen >> 8) & 0xff);
  52. header[6] = (byte) ((totalDataLen >> 16) & 0xff);
  53. header[7] = (byte) ((totalDataLen >> 24) & 0xff);
  54. header[8] = 'W';
  55. header[9] = 'A';
  56. header[10] = 'V';
  57. header[11] = 'E';
  58. header[12] = 'f'; // 'fmt ' chunk
  59. header[13] = 'm';
  60. header[14] = 't';
  61. header[15] = ' ';
  62. header[16] = 16; // 4 bytes: size of 'fmt ' chunk
  63. header[17] = 0;
  64. header[18] = 0;
  65. header[19] = 0;
  66. header[20] = 1; // format = 1
  67. header[21] = 0;
  68. header[22] = (byte) channels;
  69. header[23] = 0;
  70. header[24] = (byte) (longSampleRate & 0xff);
  71. header[25] = (byte) ((longSampleRate >> 8) & 0xff);
  72. header[26] = (byte) ((longSampleRate >> 16) & 0xff);
  73. header[27] = (byte) ((longSampleRate >> 24) & 0xff);
  74. header[28] = (byte) (byteRate & 0xff);
  75. header[29] = (byte) ((byteRate >> 8) & 0xff);
  76. header[30] = (byte) ((byteRate >> 16) & 0xff);
  77. header[31] = (byte) ((byteRate >> 24) & 0xff);
  78. header[32] = (byte) (1 * 16 / 8); // block align
  79. header[33] = 0;
  80. header[34] = RECORDER_BPP; // bits per sample
  81. header[35] = 0;
  82. header[36] = 'd';
  83. header[37] = 'a';
  84. header[38] = 't';
  85. header[39] = 'a';
  86. header[40] = (byte) (totalAudioLen & 0xff);
  87. header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
  88. header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
  89. header[43] = (byte) ((totalAudioLen >> 24) & 0xff);
  90. out.write(header, 0, 44);
  91. }
  92. }

最後更新:2016-12-13 11:16:18

  上一篇:go 批量合成工具__語音合成(TTS)_智能語音交互-阿裏雲
  下一篇:go 協議描述語言__語義表示協議_自然語言理解(NLU)_智能語音交互-阿裏雲