-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add various functions for kaldi-hmm-gmm (#46)
- Loading branch information
1 parent
31663a5
commit 45a40c1
Showing
42 changed files
with
4,000 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,43 +1,44 @@ | ||
# Copyright 2020 Fangjun Kuang ([email protected]) | ||
# See ../LICENSE for clarification regarding multiple authors | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
function(download_googltest) | ||
include(FetchContent) | ||
|
||
set(googletest_URL "https://github.com/google/googletest/archive/release-1.10.0.tar.gz") | ||
set(googletest_HASH "SHA256=9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb") | ||
set(googletest_URL "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz") | ||
set(googletest_URL2 "https://huggingface.co/csukuangfj/sherpa-cmake-deps/resolve/main/googletest-1.13.0.tar.gz") | ||
set(googletest_HASH "SHA256=ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363") | ||
|
||
# If you don't have access to the Internet, | ||
# please pre-download googletest | ||
set(possible_file_locations | ||
$ENV{HOME}/Downloads/googletest-1.13.0.tar.gz | ||
${PROJECT_SOURCE_DIR}/googletest-1.13.0.tar.gz | ||
${PROJECT_BINARY_DIR}/googletest-1.13.0.tar.gz | ||
/tmp/googletest-1.13.0.tar.gz | ||
/star-fj/fangjun/download/github/googletest-1.13.0.tar.gz | ||
) | ||
|
||
# If you don't have access to the Internet, please download the file to your | ||
# local drive and replace with the line below (you need to change it accordingly. | ||
# I am placing it in /mypath/release-1.10.0.tar.gz, but you can place it | ||
# anywhere you like) | ||
# set(googletest_URL "/mypath/release-1.10.0.tar.gz") | ||
foreach(f IN LISTS possible_file_locations) | ||
if(EXISTS ${f}) | ||
set(googletest_URL "${f}") | ||
file(TO_CMAKE_PATH "${googletest_URL}" googletest_URL) | ||
set(googletest_URL2) | ||
break() | ||
endif() | ||
endforeach() | ||
|
||
set(BUILD_GMOCK ON CACHE BOOL "" FORCE) | ||
set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) | ||
set(gtest_disable_pthreads ON CACHE BOOL "" FORCE) | ||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) | ||
|
||
FetchContent_Declare(googletest | ||
URL ${googletest_URL} | ||
URL | ||
${googletest_URL} | ||
${googletest_URL2} | ||
URL_HASH ${googletest_HASH} | ||
) | ||
|
||
FetchContent_GetProperties(googletest) | ||
if(NOT googletest_POPULATED) | ||
message(STATUS "Downloading googletest ${googletest_URL}") | ||
message(STATUS "Downloading googletest from ${googletest_URL}") | ||
FetchContent_Populate(googletest) | ||
endif() | ||
message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,33 @@ | ||
# Copyright 2020 Fangjun Kuang ([email protected]) | ||
# See ../LICENSE for clarification regarding multiple authors | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
function(download_pybind11) | ||
include(FetchContent) | ||
|
||
set(pybind11_URL "https://github.com/pybind/pybind11/archive/refs/tags/v2.10.2.tar.gz") | ||
set(pybind11_URL2 "https://huggingface.co/csukuangfj/sherpa-cmake-deps/resolve/main/pybind11-2.10.2.tar.gz") | ||
set(pybind11_HASH "SHA256=93bd1e625e43e03028a3ea7389bba5d3f9f2596abc074b068e70f4ef9b1314ae") | ||
|
||
# If you don't have access to the Internet, please download it to your | ||
# local drive and modify the following line according to your needs. | ||
if(EXISTS "/star-fj/fangjun/download/github/pybind11-2.10.2.tar.gz") | ||
set(pybind11_URL "file:///star-fj/fangjun/download/github/pybind11-2.10.2.tar.gz") | ||
elseif(EXISTS "/Users/fangjun/Downloads/pybind11-2.10.2.tar.gz") | ||
set(pybind11_URL "file:///Users/fangjun/Downloads/pybind11-2.10.2.tar.gz") | ||
elseif(EXISTS "/tmp/pybind11-2.10.2.tar.gz") | ||
set(pybind11_URL "file:///tmp/pybind11-2.10.2.tar.gz") | ||
endif() | ||
# If you don't have access to the Internet, | ||
# please pre-download pybind11 | ||
set(possible_file_locations | ||
$ENV{HOME}/Downloads/pybind11-2.10.2.tar.gz | ||
${PROJECT_SOURCE_DIR}/pybind11-2.10.2.tar.gz | ||
${PROJECT_BINARY_DIR}/pybind11-2.10.2.tar.gz | ||
/tmp/pybind11-2.10.2.tar.gz | ||
/star-fj/fangjun/download/github/pybind11-2.10.2.tar.gz | ||
) | ||
|
||
foreach(f IN LISTS possible_file_locations) | ||
if(EXISTS ${f}) | ||
set(pybind11_URL "${f}") | ||
file(TO_CMAKE_PATH "${pybind11_URL}" pybind11_URL) | ||
set(pybind11_URL2) | ||
break() | ||
endif() | ||
endforeach() | ||
|
||
FetchContent_Declare(pybind11 | ||
URL ${pybind11_URL} | ||
URL | ||
${pybind11_URL} | ||
${pybind11_URL2} | ||
URL_HASH ${pybind11_HASH} | ||
) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,36 @@ | ||
|
||
all: | ||
$(MAKE) -C fstcompile | ||
$(MAKE) -C add_self_loops | ||
$(MAKE) -C arcsort | ||
$(MAKE) -C compose | ||
$(MAKE) -C connect | ||
$(MAKE) -C determinize | ||
$(MAKE) -C determinizestar | ||
$(MAKE) -C draw | ||
$(MAKE) -C connect | ||
$(MAKE) -C equal_align | ||
$(MAKE) -C fstcompile | ||
$(MAKE) -C get_linear_symbol_sequence | ||
$(MAKE) -C invert | ||
$(MAKE) -C reverse | ||
$(MAKE) -C make_linear_acceptor | ||
$(MAKE) -C minimize | ||
$(MAKE) -C minimize_encoded | ||
$(MAKE) -C reverse | ||
$(MAKE) -C rmepsilon | ||
|
||
clean: | ||
$(MAKE) -C fstcompile clean | ||
$(MAKE) -C add_self_loops clean | ||
$(MAKE) -C arcsort clean | ||
$(MAKE) -C compose clean | ||
$(MAKE) -C connect clean | ||
$(MAKE) -C determinize clean | ||
$(MAKE) -C determinizestar clean | ||
$(MAKE) -C draw clean | ||
$(MAKE) -C connect clean | ||
$(MAKE) -C equal_align clean | ||
$(MAKE) -C fstcompile clean | ||
$(MAKE) -C get_linear_symbol_sequence clean | ||
$(MAKE) -C invert clean | ||
$(MAKE) -C reverse clean | ||
$(MAKE) -C make_linear_acceptor clean | ||
$(MAKE) -C minimize clean | ||
$(MAKE) -C minimize_encoded clean | ||
$(MAKE) -C reverse clean | ||
$(MAKE) -C rmepsilon clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
all: | ||
python3 ./ex.py | ||
|
||
clean: | ||
$(RM) *.svg *.gv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import graphviz | ||
|
||
import kaldifst | ||
|
||
s = """ | ||
0 1 a p | ||
1 | ||
1 2 b q | ||
2 3 c r | ||
3 4 f t | ||
3 0 d s | ||
5 0 f t | ||
""" | ||
|
||
|
||
sym1 = kaldifst.SymbolTable(name="sym1") | ||
sym1.add_symbol("a", 1) | ||
sym1.add_symbol("b", 2) | ||
sym1.add_symbol("c", 3) | ||
sym1.add_symbol("d", 4) | ||
sym1.add_symbol("f", 5) | ||
sym1.add_symbol("#0", 6) | ||
sym1.add_symbol("#1", 7) | ||
|
||
sym2 = kaldifst.SymbolTable(name="sym2") | ||
sym2.add_symbol("p", 1) | ||
sym2.add_symbol("q", 2) | ||
sym2.add_symbol("r", 3) | ||
sym2.add_symbol("s", 4) | ||
sym2.add_symbol("t", 5) | ||
sym2.add_symbol("#0", 6) | ||
sym2.add_symbol("#1", 7) | ||
|
||
fst = kaldifst.compile(s=s, acceptor=False, isymbols=sym1, osymbols=sym2) | ||
|
||
fst.input_symbols = sym1 | ||
fst.output_symbols = sym2 | ||
|
||
fst_dot = kaldifst.draw(fst, acceptor=False, portrait=True) | ||
fst_source = graphviz.Source(fst_dot) | ||
fst_source.render(outfile="fst.svg") | ||
|
||
kaldifst.add_self_loops(fst, isyms=[6, 7], osyms=[6, 7]) | ||
|
||
fst_dot = kaldifst.draw(fst, acceptor=False, portrait=True) | ||
fst_source = graphviz.Source(fst_dot) | ||
fst_source.render(outfile="fst-add-self-loops.svg") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
all: | ||
python3 ./ex.py | ||
|
||
clean: | ||
$(RM) *.svg *.gv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import graphviz | ||
|
||
import kaldifst | ||
|
||
s1 = """ | ||
0 0 e E 0.3 | ||
0 1 a A 1 | ||
0 1 b B 2.5 | ||
1 2 <eps> <eps> 0.3 | ||
1 2 <eps> <eps> 0.4 | ||
1 1 f F 0.03 | ||
2 2 g G 0.8 | ||
2 3 h H 0.12 | ||
3 | ||
""" | ||
|
||
sym1 = kaldifst.SymbolTable(name="sym1") | ||
sym1.add_symbol("<eps>", 0) | ||
sym1.add_symbol("a", 1) | ||
sym1.add_symbol("b", 2) | ||
sym1.add_symbol("c", 3) | ||
sym1.add_symbol("d", 4) | ||
sym1.add_symbol("e", 5) | ||
sym1.add_symbol("f", 6) | ||
sym1.add_symbol("g", 7) | ||
sym1.add_symbol("h", 8) | ||
|
||
sym2 = kaldifst.SymbolTable(name="sym2") | ||
sym2.add_symbol("<eps>", 0) | ||
sym2.add_symbol("A", 1) | ||
sym2.add_symbol("B", 2) | ||
sym2.add_symbol("C", 3) | ||
sym2.add_symbol("D", 4) | ||
sym2.add_symbol("E", 5) | ||
sym2.add_symbol("F", 6) | ||
sym2.add_symbol("G", 7) | ||
sym2.add_symbol("H", 8) | ||
|
||
fst = kaldifst.compile(s=s1, acceptor=False, isymbols=sym1, osymbols=sym2) | ||
|
||
fst.input_symbols = sym1 | ||
fst.output_symbols = sym2 | ||
|
||
fst_dot = kaldifst.draw(fst, acceptor=False, portrait=True) | ||
fst_source = graphviz.Source(fst_dot) | ||
fst_source.render(outfile="input.svg") | ||
|
||
succeeded, first = kaldifst.equal_align( | ||
ifst=fst, length=4, rand_seed=3, num_retries=10 | ||
) | ||
assert succeeded is True | ||
first.input_symbols = sym1 | ||
first.output_symbols = sym1 | ||
|
||
first_dot = kaldifst.draw(first, acceptor=False, portrait=True) | ||
first_source = graphviz.Source(first_dot) | ||
first_source.render(outfile="first.svg") | ||
|
||
succeeded, second = kaldifst.equal_align( | ||
ifst=fst, length=5, rand_seed=10, num_retries=10 | ||
) | ||
assert succeeded is True | ||
second.input_symbols = sym1 | ||
second.output_symbols = sym2 | ||
|
||
second_dot = kaldifst.draw(second, acceptor=False, portrait=True) | ||
second_source = graphviz.Source(second_dot) | ||
second_source.render(outfile="second.svg") |
6 changes: 6 additions & 0 deletions
6
docs/source/python_api/code/get_linear_symbol_sequence/Makefile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
all: | ||
python3 ./ex.py | ||
|
||
clean: | ||
$(RM) *.svg *.gv |
Oops, something went wrong.