first commit

aethiopicuschan · Nov 2, 2023 · 9c77014 · 9c77014
commit 9c77014
Show file tree

Hide file tree

Showing 25 changed files with 1,389 additions and 0 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,41 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - '**'
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v4
+        with:
+          go-version-file: "go.mod"
+
+      - run: go install golang.org/x/tools/cmd/goimports@latest
+      - run: go install github.com/go-critic/go-critic/cmd/gocritic@latest
+
+      - name: pre-commit
+        uses: pre-commit/[email protected]
+
+  test:
+    needs: lint
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v4
+        with:
+          go-version-file: "go.mod"
+
+      - run: go test ./...
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+model
+voicevox_core
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,8 @@
+repos:
+  - repo: https://github.com/aethiopicuschan/pre-commit-golang
+    rev: c17f835cf9f04b8b5ed1c1f7757cedc6728d8a21
+    hooks:
+      - id: go-fmt
+      - id: go-imports
+      - id: go-critic
+      - id: go-mod-tidy
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 aethiopicuschan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,102 @@
+# nanoda
+
+[![License: MIT](https://img.shields.io/badge/License-MIT-brightgreen?style=flat-square)](/LICENSE)
+[![Go Reference](https://pkg.go.dev/badge/github.com/aethiopicuschan/nanoda.svg)](https://pkg.go.dev/github.com/aethiopicuschan/nanoda)
+[![CI](https://github.com/aethiopicuschan/nanoda/actions/workflows/ci.yml/badge.svg)](https://github.com/aethiopicuschan/nanoda/actions/workflows/ci.yml)
+
+nanodaは[VOICEVOX CORE](https://github.com/VOICEVOX/voicevox_core)の動的ライブラリをGolangから叩くためのライブラリです。`cgo`ではなく[ebitengine/purego](https://github.com/ebitengine/purego/)を利用しているため、簡単に使用することが可能です。
+
+## VOICEVOXについて
+
+サポートするVOICEVOX COREのバージョンは `0.15` としており、開発は `0.15.0-preview.13` を元にしています。
+
+nanoda自体は[MITライセンス](/LICENSE)ですが、利用に際してはVOICEVOXやOpenJTalkの利用規約に則る必要があることに注意してください。
+
+## 使い方
+
+```sh
+go get github.com/aethiopicuschan/nanoda@latest
+```
+
+もっとも簡単な例は以下のようになります。
+
+```go
+v, _ := nanoda.NewVoicevox("voicevox_core/libvoicevox_core.dylib", "voicevox_core/open_jtalk_dic_utf_8-1.11", "voicevox_core/model")
+s, _ := v.NewSynthesizer()
+s.LoadAllModels()
+wav, _ := s.Tts("ずんだもんなのだ！", 3)
+defer wav.Close()
+f, _ := os.Create("output.wav")
+defer f.Close()
+io.Copy(f, wav)
+```
+
+その他 `examples` ディレクトリにサンプルコードを置いていますので、ご活用ください。
+
+## 動作環境
+
+```
+GOARCH='arm64'
+GOOS='darwin'
+```
+
+でのみ確認しています。
+
+## 開発方針
+
+以下の理由からなるべくnanoda側で処理を受け持ったり抽象化したりして機能を提供することを目指しています。
+
+- 使いやすさの向上
+- メモリまわりの安全性要件の確保
+- VOICEVOXとアプリケーション間の密結合を避け、APIの変更等に強くする
+
+## テスト
+
+TODOです。ありません。
+
+## 対応状況
+
+以下は内部的に利用している関数のリストであり、必ずしも一致する形で公開されているわけではありません。
+
+- [x] voicevox_create_supported_devices_json
+- [x] voicevox_error_result_to_message
+- [x] voicevox_get_version
+- [x] voicevox_json_free
+- [ ] voicevox_make_default_initialize_options
+- [ ] voicevox_make_default_synthesis_options
+- [ ] voicevox_make_default_tts_options
+- [x] voicevox_open_jtalk_rc_delete
+- [x] voicevox_open_jtalk_rc_new
+- [x] voicevox_open_jtalk_rc_use_user_dict
+- [x] voicevox_synthesizer_create_accent_phrases
+- [x] voicevox_synthesizer_create_accent_phrases_from_kana
+- [x] voicevox_synthesizer_create_audio_query
+- [x] voicevox_synthesizer_create_audio_query_from_kana
+- [x] voicevox_synthesizer_create_metas_json
+- [x] voicevox_synthesizer_delete
+- [x] voicevox_synthesizer_is_gpu_mode
+- [x] voicevox_synthesizer_is_loaded_voice_model
+- [x] voicevox_synthesizer_load_voice_model
+- [x] voicevox_synthesizer_new_with_initialize
+- [x] voicevox_synthesizer_replace_mora_data
+- [x] voicevox_synthesizer_replace_mora_pitch
+- [x] voicevox_synthesizer_replace_phoneme_length
+- [x] voicevox_synthesizer_synthesis
+- [x] voicevox_synthesizer_tts
+- [x] voicevox_synthesizer_tts_from_kana
+- [x] voicevox_synthesizer_unload_voice_model
+- [x] voicevox_user_dict_add_word
+- [x] voicevox_user_dict_delete
+- [x] voicevox_user_dict_import
+- [x] voicevox_user_dict_load
+- [x] voicevox_user_dict_new
+- [x] voicevox_user_dict_remove_word
+- [x] voicevox_user_dict_save
+- [x] voicevox_user_dict_to_json
+- [x] voicevox_user_dict_update_word
+- [ ] voicevox_user_dict_word_make
+- [x] voicevox_voice_model_delete
+- [x] voicevox_voice_model_get_metas_json
+- [x] voicevox_voice_model_id
+- [x] voicevox_voice_model_new_from_path
+- [x] voicevox_wav_free
diff --git a/accent_phrases.go b/accent_phrases.go
@@ -0,0 +1,55 @@
+package nanoda
+
+import (
+	"encoding/json"
+	"unsafe"
+
+	"github.com/aethiopicuschan/nanoda/internal/strings"
+)
+
+// モーラ（子音＋母音）
+type Mora struct {
+	Text            string   `json:"text"`
+	Consonant       *string  `json:"consonant"`
+	ConsonantLength *float64 `json:"consonant_length"`
+	Vowel           string   `json:"vowel"`
+	VowelLength     float64  `json:"vowel_length"`
+	Pitch           float64  `json:"pitch"`
+}
+
+// アクセント句
+type AccentPhrase struct {
+	Moras           []Mora `json:"moras"`
+	Accent          int    `json:"accent"`
+	PauseMora       *Mora  `json:"pause_mora"`
+	IsInterrogative bool   `json:"is_interrogative"`
+}
+
+// アクセント句の配列を生成する
+func (s *Synthesizer) createAccentPhrases(text string, styleID StyleId, enableKana bool) (a []AccentPhrase, err error) {
+	var ptr *byte
+	var code ResultCode
+	if enableKana {
+		code = s.v.voicevoxSynthesizerCreateAccentPhrasesFromKana(s.synthesizer, text, styleID, uintptr(unsafe.Pointer(&ptr)))
+	} else {
+		code = s.v.voicevoxSynthesizerCreateAccentPhrases(s.synthesizer, text, styleID, uintptr(unsafe.Pointer(&ptr)))
+	}
+	if code != VOICEVOX_RESULT_OK {
+		err = s.v.newError(code)
+		return
+	}
+	defer s.v.voicevoxJsonFree(uintptr(unsafe.Pointer(ptr)))
+	j := strings.GoString(ptr)
+	err = json.Unmarshal([]byte(j), &a)
+	return
+}
+
+// アクセント句の配列を生成する
+func (s *Synthesizer) CreateAccentPhrases(text string, styleID StyleId) (a []AccentPhrase, err error) {
+	return s.createAccentPhrases(text, styleID, false)
+}
+
+// アクセント句の配列を生成する(AquesTalk風記法)
+func (s *Synthesizer) CreateAccentPhrasesFromKana(text string, styleID StyleId) (a []AccentPhrase, err error) {
+	return s.createAccentPhrases(text, styleID, true)
+}
diff --git a/audio_query.go b/audio_query.go
@@ -0,0 +1,51 @@
+package nanoda
+
+import (
+	"encoding/json"
+	"unsafe"
+
+	"github.com/aethiopicuschan/nanoda/internal/strings"
+)
+
+// 音声合成用のクエリ
+type AudioQuery struct {
+	AccentPhrases      []AccentPhrase `json:"accent_phrases"`
+	SpeedScale         float64        `json:"speed_scale"`
+	PitchScale         float64        `json:"pitch_scale"`
+	IntonationScale    float64        `json:"intonation_scale"`
+	VolumeScale        float64        `json:"volume_scale"`
+	PrePhonemeLength   float64        `json:"pre_phoneme_length"`
+	PostPhonemeLength  float64        `json:"post_phoneme_length"`
+	OutputSamplingRate int            `json:"output_sampling_rate"`
+	OutputStereo       bool           `json:"output_stereo"`
+	Kana               string         `json:"kana"`
+}
+
+// 音声合成用のクエリを作成する
+func (s *Synthesizer) createAudioQuery(text string, styleID StyleId, enableKana bool) (a AudioQuery, err error) {
+	var ptr *byte
+	var code ResultCode
+	if enableKana {
+		code = s.v.voicevoxSynthesizerCreateAudioQueryFromKana(s.synthesizer, text, styleID, uintptr(unsafe.Pointer(&ptr)))
+	} else {
+		code = s.v.voicevoxSynthesizerCreateAudioQuery(s.synthesizer, text, styleID, uintptr(unsafe.Pointer(&ptr)))
+	}
+	if code != VOICEVOX_RESULT_OK {
+		err = s.v.newError(code)
+		return
+	}
+	defer s.v.voicevoxJsonFree(uintptr(unsafe.Pointer(ptr)))
+	j := strings.GoString(ptr)
+	err = json.Unmarshal([]byte(j), &a)
+	return
+}
+
+// 音声合成用のクエリを生成する
+func (s *Synthesizer) CreateAudioQuery(text string, styleID StyleId) (a AudioQuery, err error) {
+	return s.createAudioQuery(text, styleID, false)
+}
+
+// 音声合成用のクエリを生成する(AquesTalk風記法)
+func (s *Synthesizer) CreateAudioQueryFromKana(text string, styleID StyleId) (a AudioQuery, err error) {
+	return s.createAudioQuery(text, styleID, true)
+}
diff --git a/error.go b/error.go
@@ -0,0 +1,19 @@
+package nanoda
+
+import "fmt"
+
+type Error struct {
+	Code ResultCode
+	Msg  string
+}
+
+func (e Error) Error() string {
+	return fmt.Sprintf("%d: %s", e.Code, e.Msg)
+}
+
+func (v *Voicevox) newError(code ResultCode) error {
+	return Error{
+		Code: code,
+		Msg:  v.voicevoxErrorResultToMessage(code),
+	}
+}
diff --git a/examples/audioquery/main.go b/examples/audioquery/main.go
@@ -0,0 +1,23 @@
+package main
+
+import (
+	"io"
+	"os"
+
+	"github.com/aethiopicuschan/nanoda"
+)
+
+func main() {
+	v, _ := nanoda.NewVoicevox("voicevox_core/libvoicevox_core.dylib", "voicevox_core/open_jtalk_dic_utf_8-1.11", "voicevox_core/model")
+	s, _ := v.NewSynthesizer()
+	s.LoadModelsFromStyleId(3)
+
+	aq, _ := s.CreateAudioQuery("2倍速ずんだもんなのだ！", 3)
+	aq.SpeedScale = 2.0
+
+	wav, _ := s.Synthesis(aq, 3)
+	defer wav.Close()
+	f, _ := os.Create("output.wav")
+	defer f.Close()
+	io.Copy(f, wav)
+}
diff --git a/examples/tts/main.go b/examples/tts/main.go
@@ -0,0 +1,19 @@
+package main
+
+import (
+	"io"
+	"os"
+
+	"github.com/aethiopicuschan/nanoda"
+)
+
+func main() {
+	v, _ := nanoda.NewVoicevox("voicevox_core/libvoicevox_core.dylib", "voicevox_core/open_jtalk_dic_utf_8-1.11", "voicevox_core/model")
+	s, _ := v.NewSynthesizer()
+	s.LoadModelsFromStyleId(3)
+	wav, _ := s.Tts("ずんだもんなのだ！", 3)
+	defer wav.Close()
+	f, _ := os.Create("output.wav")
+	defer f.Close()
+	io.Copy(f, wav)
+}
diff --git a/examples/userdict/main.go b/examples/userdict/main.go
@@ -0,0 +1,26 @@
+package main
+
+import (
+	"io"
+	"os"
+
+	"github.com/aethiopicuschan/nanoda"
+)
+
+func main() {
+	v, _ := nanoda.NewVoicevox("voicevox_core/libvoicevox_core.dylib", "voicevox_core/open_jtalk_dic_utf_8-1.11", "voicevox_core/model")
+
+	ud := v.NewUserDict()
+	w := nanoda.NewWord("開始めいッ", "ハジメイッ")
+	ud.AddWord(w)
+	ud.Use()
+
+	s, _ := v.NewSynthesizer()
+	s.LoadModelsFromStyleId(3)
+
+	wav, _ := s.Tts("開始めいッ！", 3)
+	defer wav.Close()
+	f, _ := os.Create("output.wav")
+	defer f.Close()
+	io.Copy(f, wav)
+}
diff --git a/go.mod b/go.mod
@@ -0,0 +1,10 @@
+module github.com/aethiopicuschan/nanoda
+
+go 1.21.3
+
+require (
+	github.com/ebitengine/purego v0.5.0
+	github.com/google/uuid v1.3.1
+)
+
+require golang.org/x/sys v0.13.0 // indirect
diff --git a/go.sum b/go.sum
@@ -0,0 +1,6 @@
+github.com/ebitengine/purego v0.5.0 h1:JrMGKfRIAM4/QVKaesIIT7m/UVjTj5GYhRSQYwfVdpo=
+github.com/ebitengine/purego v0.5.0/go.mod h1:ah1In8AOtksoNK6yk5z1HTJeUkC1Ez4Wk2idgGslMwQ=
+github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
+github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
+golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=