Skip to content

Commit

Permalink
support lexicon
Browse files Browse the repository at this point in the history
  • Loading branch information
wxxxcxx committed Nov 17, 2022
1 parent cff9f75 commit ee207f0
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 13 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

## 重要更改

**2022-09-10:修改 docker 仓库地址,后面构建的 docker 镜像会迁移到 wxxxcxx/ms-ra-forwarder(原仓库旧版本镜像依然有效)。**
**2022-11-18:添加词典文件支持,词典文件格式参考 https://github.com/wxxxcxx/azure-tts-lexicon-cn/blob/main/lexicon.xml。**

2022-09-10:修改 docker 仓库地址,后面构建的 docker 镜像会迁移到 wxxxcxx/ms-ra-forwarder(原仓库旧版本镜像依然有效)。

2022-09-01:Azure TTS API 好像又改了,旧版用户可能会无法正常使用,请更新到最新版。

Expand Down
2 changes: 2 additions & 0 deletions api/legado.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module.exports = async (request: Request, response: Response) => {
let styleDegree = request.query['styleDegree']
let voiceFormat =
request.query['voiceFormat'] ?? 'audio-16khz-32kbitrate-mono-mp3'
let lexicon = request.query['lexicon'] ?? ''
let token = request.query['token'] ?? ''

if (Array.isArray(voiceFormat)) {
Expand All @@ -34,6 +35,7 @@ module.exports = async (request: Request, response: Response) => {
let ssml =
`<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US">` +
`<voice name="${voiceName}">` +
(lexicon==='' ? '' : `<lexicon uri="${lexicon}"/>`)+
(styleName
? `<mstts:express-as style="${styleName}" styledegree="${styleDegree}">`
: ``) +
Expand Down
56 changes: 44 additions & 12 deletions public/azure.html
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
<div class="col">
<div class="alert alert-warning" role="alert">
Azure 版本。此版本使用了<a href="https://azure.microsoft.com/zh-cn/services/cognitive-services/text-to-speech/"
target="_blank">Azure 演示页面</a>的接口。有问题请提 <a href="https://github.com/wxxxcxx/ms-ra-forwarder/issues">issue</a>
target="_blank">Azure 演示页面</a>的接口。有问题请提 <a
href="https://github.com/wxxxcxx/ms-ra-forwarder/issues">issue</a>
</div>
</div>
</div>
Expand All @@ -43,7 +44,9 @@
<label for="voiceName" class="form-label">声音:</label>
<select name="voiceName" class="form-select" onchange="updateConfigName()">
</select>
<div class="form-text">声音列表加载可能有点慢,请稍等一下!</div>
<div class="form-text">
<p>声音列表加载可能有点慢,请稍等一下!</p>
</div>
</div>
</div>
<div class="row">
Expand All @@ -52,13 +55,17 @@
<select name="styleName" class="form-select">
<option value="general">general</option>
</select>
<div class="form-text">指定讲话风格。 说话风格特定于语音。</div>
<div class="form-text">
<p>指定讲话风格。 说话风格特定于语音。</p>
</div>
</div>
<div class="col">
<label for="styleDegree" class="form-label">风格强度:</label>
<input name="styleDegree" class="form-control" type="number" min="0.1" max="2.0" step="0.1" value="1.0">
<div class="form-text">指定说话风格的强度。 接受的值:0.01 到 2(含边界值)。 默认值为 1,表示预定义的风格强度。 最小单位为 0.01,表示略倾向于目标风格。 值为 2
表示是默认风格强度的两倍。</div>
<div class="form-text">
<p>指定说话风格的强度。 接受的值:0.01 到 2(含边界值)。 默认值为 1,表示预定义的风格强度。 最小单位为 0.01,表示略倾向于目标风格。 值为 2
表示是默认风格强度的两倍。</p>
</div>
</div>
</div>
<div class="row">
Expand Down Expand Up @@ -95,12 +102,24 @@
<p>如果出现 “Unsupported output format: XXX” 错误,表示不支持当前格式。</p>
</div>
</div>

<div class="row">
<div>
<label for="lexicon" class="form-label">词典文件:</label>
<input name="lexicon" class="form-control" type="text" value="">
<div class="form-text">
<p>词典文件的链接,用于改善多音字的发音,默认为空。(可以参考<a href="https://github.com/wxxxcxx/azure-tts-lexicon-cn"
target="_blank">wxxxcxx/azure-tts-lexicon-cn</a>
</p>
</div>
</div>
</div>
<div class="row">
<div>
<label for="token" class="form-label">凭据(TOKEN):</label>
<input name="token" class="form-control" type="text" value="">
<div class="form-text">如果没有设置 TOKEN 环境变量请留空。</div>
<div class="form-text">
<p>如果没有设置 TOKEN 环境变量请留空。</p>
</div>
</div>
</div>

Expand Down Expand Up @@ -201,17 +220,27 @@ <h5 class="modal-title">阅读链接</h5>
}
}

function createSSML(text, voiceName, styleName, styleDegree) {
let ssml = `\
function createSSML(text, voiceName, styleName, styleDegree, lexicon) {
let ssml = lexicon === '' ? `\
<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US">\
<voice name="${voiceName}">\
<mstts:express-as style="${styleName}" styledegree="${styleDegree}">\
<prosody rate="0%" pitch="0%">\
${text}\
</prosody >\
</mstts:express-as>
</voice >\
</speak > `: `\
<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US">\
<voice name="${voiceName}">\
<lexicon uri="${lexicon}"/>
<mstts:express-as style="${styleName}" styledegree="${styleDegree}">\
<prosody rate="0%" pitch="0%">\
${text}\
</prosody >\
</mstts:express-as>
</voice >\
</speak > `
</speak > `;

return ssml;
}
Expand All @@ -223,9 +252,10 @@ <h5 class="modal-title">阅读链接</h5>
let voiceFormat = document.getElementsByName('voiceFormat')[0].value;
let styleName = document.getElementsByName('styleName')[0].value;
let styleDegree = document.getElementsByName('styleDegree')[0].value;
let lexicon = document.getElementsByName('lexicon')[0].value;
let token = document.getElementsByName('token')[0].value;
let previewText = document.getElementsByName('previewText')[0].value;
let ssml = createSSML(previewText, voiceName, styleName, styleDegree)
let ssml = createSSML(previewText, voiceName, styleName, styleDegree, lexicon)
if (token) {
headers['Authorization'] = 'Bearer ' + token;
}
Expand Down Expand Up @@ -268,6 +298,7 @@ <h5 class="modal-title">阅读链接</h5>
let styleName = document.getElementsByName('styleName')[0].value;
let styleDegree = document.getElementsByName('styleDegree')[0].value;
let voiceFormat = document.getElementsByName('voiceFormat')[0].value;
let lexicon = document.getElementsByName('lexicon')[0].value;
let token = document.getElementsByName('token')[0].value;
let previewText = document.getElementsByName('previewText')[0].value;
let url = window.location.protocol + '//' + window.location.host + '/api/legado?api=' + encodeURI(window.location.protocol + '//' + window.location.host + '/api/azure')
Expand All @@ -276,6 +307,7 @@ <h5 class="modal-title">阅读链接</h5>
+ '&styleName=' + styleName
+ '&styleDegree=' + styleDegree
+ '&voiceFormat=' + voiceFormat
+ '&lexicon=' + encodeURI(lexicon)
+ '&token=' + token;
let svg = new QRCode(url).svg();
let modal = new bootstrap.Modal(document.getElementById('legadoUrlModal'))
Expand All @@ -286,4 +318,4 @@ <h5 class="modal-title">阅读链接</h5>
</script>
</body>

</html>
</html>

0 comments on commit ee207f0

Please sign in to comment.