Skip to content

Commit

Permalink
Add various functions for kaldi-hmm-gmm (#46)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Aug 30, 2023
1 parent 31663a5 commit 45a40c1
Show file tree
Hide file tree
Showing 42 changed files with 4,000 additions and 65 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(kaldifst CXX)

# Remember to also change ./scripts/conda/kaldifst/meta.yaml
set(KALDIFST_VERSION "1.6")
set(KALDIFST_VERSION "1.7.0")

if(NOT CMAKE_BUILD_TYPE)
message(STATUS "No CMAKE_BUILD_TYPE given, default to Release")
Expand Down
49 changes: 25 additions & 24 deletions cmake/googletest.cmake
Original file line number Diff line number Diff line change
@@ -1,43 +1,44 @@
# Copyright 2020 Fangjun Kuang ([email protected])
# See ../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

function(download_googltest)
include(FetchContent)

set(googletest_URL "https://github.com/google/googletest/archive/release-1.10.0.tar.gz")
set(googletest_HASH "SHA256=9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb")
set(googletest_URL "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz")
set(googletest_URL2 "https://huggingface.co/csukuangfj/sherpa-cmake-deps/resolve/main/googletest-1.13.0.tar.gz")
set(googletest_HASH "SHA256=ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363")

# If you don't have access to the Internet,
# please pre-download googletest
set(possible_file_locations
$ENV{HOME}/Downloads/googletest-1.13.0.tar.gz
${PROJECT_SOURCE_DIR}/googletest-1.13.0.tar.gz
${PROJECT_BINARY_DIR}/googletest-1.13.0.tar.gz
/tmp/googletest-1.13.0.tar.gz
/star-fj/fangjun/download/github/googletest-1.13.0.tar.gz
)

# If you don't have access to the Internet, please download the file to your
# local drive and replace with the line below (you need to change it accordingly.
# I am placing it in /mypath/release-1.10.0.tar.gz, but you can place it
# anywhere you like)
# set(googletest_URL "/mypath/release-1.10.0.tar.gz")
foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(googletest_URL "${f}")
file(TO_CMAKE_PATH "${googletest_URL}" googletest_URL)
set(googletest_URL2)
break()
endif()
endforeach()

set(BUILD_GMOCK ON CACHE BOOL "" FORCE)
set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
set(gtest_disable_pthreads ON CACHE BOOL "" FORCE)
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)

FetchContent_Declare(googletest
URL ${googletest_URL}
URL
${googletest_URL}
${googletest_URL2}
URL_HASH ${googletest_HASH}
)

FetchContent_GetProperties(googletest)
if(NOT googletest_POPULATED)
message(STATUS "Downloading googletest ${googletest_URL}")
message(STATUS "Downloading googletest from ${googletest_URL}")
FetchContent_Populate(googletest)
endif()
message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}")
Expand Down
33 changes: 25 additions & 8 deletions cmake/openfst.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,27 @@ function(download_openfst)
include(FetchContent)

set(openfst_URL "https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz")
set(openfst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/openfst-win-1.6.5.1.tar.gz")
set(openfst_HASH "SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e")

# If you don't have access to the Internet, please download the file to your
# local drive and replace with the line below (you need to change it accordingly.
# I am placing it in /mypath/openfst-win-1.6.5.1.tar.gz, but you can place it
# anywhere you like)
# set(openfst_URL "/mypath/openfst-win-1.6.5.1.tar.gz")
# If you don't have access to the Internet,
# please pre-download it
set(possible_file_locations
$ENV{HOME}/Downloads/openfst-win-1.6.5.1.tar.gz
${PROJECT_SOURCE_DIR}/openfst-win-1.6.5.1.tar.gz
${PROJECT_BINARY_DIR}/openfst-win-1.6.5.1.tar.gz
/tmp/openfst-win-1.6.5.1.tar.gz
/star-fj/fangjun/download/github/openfst-win-1.6.5.1.tar.gz
)

foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(openfst_URL "${f}")
file(TO_CMAKE_PATH "${openfst_URL}" openfst_URL)
set(openfst_URL2)
break()
endif()
endforeach()

set(HAVE_BIN OFF CACHE BOOL "" FORCE)
set(HAVE_SCRIPT ON CACHE BOOL "" FORCE)
Expand All @@ -29,11 +43,14 @@ function(download_openfst)

if(NOT WIN32)
FetchContent_Declare(openfst
URL ${openfst_URL}
URL
${openfst_URL}
${openfst_URL2}
URL_HASH ${openfst_HASH}
PATCH_COMMAND
sed -i.bak s/enable_testing\(\)//g "src/CMakeLists.txt" &&
sed -i.bak s/add_subdirectory\(test\)//g "src/CMakeLists.txt"
sed -i.bak s/add_subdirectory\(test\)//g "src/CMakeLists.txt" &&
sed -i.bak /message/d "src/script/CMakeLists.txt"
# sed -i.bak s/add_subdirectory\(script\)//g "src/CMakeLists.txt" &&
# sed -i.bak s/add_subdirectory\(extensions\)//g "src/CMakeLists.txt"
)
Expand All @@ -46,7 +63,7 @@ function(download_openfst)

FetchContent_GetProperties(openfst)
if(NOT openfst_POPULATED)
message(STATUS "Downloading openfst ${openfst_URL}")
message(STATUS "Downloading openfst from ${openfst_URL}")
FetchContent_Populate(openfst)
endif()
message(STATUS "openfst is downloaded to ${openfst_SOURCE_DIR}")
Expand Down
47 changes: 22 additions & 25 deletions cmake/pybind11.cmake
Original file line number Diff line number Diff line change
@@ -1,36 +1,33 @@
# Copyright 2020 Fangjun Kuang ([email protected])
# See ../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

function(download_pybind11)
include(FetchContent)

set(pybind11_URL "https://github.com/pybind/pybind11/archive/refs/tags/v2.10.2.tar.gz")
set(pybind11_URL2 "https://huggingface.co/csukuangfj/sherpa-cmake-deps/resolve/main/pybind11-2.10.2.tar.gz")
set(pybind11_HASH "SHA256=93bd1e625e43e03028a3ea7389bba5d3f9f2596abc074b068e70f4ef9b1314ae")

# If you don't have access to the Internet, please download it to your
# local drive and modify the following line according to your needs.
if(EXISTS "/star-fj/fangjun/download/github/pybind11-2.10.2.tar.gz")
set(pybind11_URL "file:///star-fj/fangjun/download/github/pybind11-2.10.2.tar.gz")
elseif(EXISTS "/Users/fangjun/Downloads/pybind11-2.10.2.tar.gz")
set(pybind11_URL "file:///Users/fangjun/Downloads/pybind11-2.10.2.tar.gz")
elseif(EXISTS "/tmp/pybind11-2.10.2.tar.gz")
set(pybind11_URL "file:///tmp/pybind11-2.10.2.tar.gz")
endif()
# If you don't have access to the Internet,
# please pre-download pybind11
set(possible_file_locations
$ENV{HOME}/Downloads/pybind11-2.10.2.tar.gz
${PROJECT_SOURCE_DIR}/pybind11-2.10.2.tar.gz
${PROJECT_BINARY_DIR}/pybind11-2.10.2.tar.gz
/tmp/pybind11-2.10.2.tar.gz
/star-fj/fangjun/download/github/pybind11-2.10.2.tar.gz
)

foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(pybind11_URL "${f}")
file(TO_CMAKE_PATH "${pybind11_URL}" pybind11_URL)
set(pybind11_URL2)
break()
endif()
endforeach()

FetchContent_Declare(pybind11
URL ${pybind11_URL}
URL
${pybind11_URL}
${pybind11_URL2}
URL_HASH ${pybind11_HASH}
)

Expand Down
20 changes: 14 additions & 6 deletions docs/source/python_api/code/Makefile
Original file line number Diff line number Diff line change
@@ -1,28 +1,36 @@

all:
$(MAKE) -C fstcompile
$(MAKE) -C add_self_loops
$(MAKE) -C arcsort
$(MAKE) -C compose
$(MAKE) -C connect
$(MAKE) -C determinize
$(MAKE) -C determinizestar
$(MAKE) -C draw
$(MAKE) -C connect
$(MAKE) -C equal_align
$(MAKE) -C fstcompile
$(MAKE) -C get_linear_symbol_sequence
$(MAKE) -C invert
$(MAKE) -C reverse
$(MAKE) -C make_linear_acceptor
$(MAKE) -C minimize
$(MAKE) -C minimize_encoded
$(MAKE) -C reverse
$(MAKE) -C rmepsilon

clean:
$(MAKE) -C fstcompile clean
$(MAKE) -C add_self_loops clean
$(MAKE) -C arcsort clean
$(MAKE) -C compose clean
$(MAKE) -C connect clean
$(MAKE) -C determinize clean
$(MAKE) -C determinizestar clean
$(MAKE) -C draw clean
$(MAKE) -C connect clean
$(MAKE) -C equal_align clean
$(MAKE) -C fstcompile clean
$(MAKE) -C get_linear_symbol_sequence clean
$(MAKE) -C invert clean
$(MAKE) -C reverse clean
$(MAKE) -C make_linear_acceptor clean
$(MAKE) -C minimize clean
$(MAKE) -C minimize_encoded clean
$(MAKE) -C reverse clean
$(MAKE) -C rmepsilon clean
6 changes: 6 additions & 0 deletions docs/source/python_api/code/add_self_loops/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

all:
python3 ./ex.py

clean:
$(RM) *.svg *.gv
47 changes: 47 additions & 0 deletions docs/source/python_api/code/add_self_loops/ex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import graphviz

import kaldifst

s = """
0 1 a p
1
1 2 b q
2 3 c r
3 4 f t
3 0 d s
5 0 f t
"""


sym1 = kaldifst.SymbolTable(name="sym1")
sym1.add_symbol("a", 1)
sym1.add_symbol("b", 2)
sym1.add_symbol("c", 3)
sym1.add_symbol("d", 4)
sym1.add_symbol("f", 5)
sym1.add_symbol("#0", 6)
sym1.add_symbol("#1", 7)

sym2 = kaldifst.SymbolTable(name="sym2")
sym2.add_symbol("p", 1)
sym2.add_symbol("q", 2)
sym2.add_symbol("r", 3)
sym2.add_symbol("s", 4)
sym2.add_symbol("t", 5)
sym2.add_symbol("#0", 6)
sym2.add_symbol("#1", 7)

fst = kaldifst.compile(s=s, acceptor=False, isymbols=sym1, osymbols=sym2)

fst.input_symbols = sym1
fst.output_symbols = sym2

fst_dot = kaldifst.draw(fst, acceptor=False, portrait=True)
fst_source = graphviz.Source(fst_dot)
fst_source.render(outfile="fst.svg")

kaldifst.add_self_loops(fst, isyms=[6, 7], osyms=[6, 7])

fst_dot = kaldifst.draw(fst, acceptor=False, portrait=True)
fst_source = graphviz.Source(fst_dot)
fst_source.render(outfile="fst-add-self-loops.svg")
6 changes: 6 additions & 0 deletions docs/source/python_api/code/equal_align/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

all:
python3 ./ex.py

clean:
$(RM) *.svg *.gv
68 changes: 68 additions & 0 deletions docs/source/python_api/code/equal_align/ex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import graphviz

import kaldifst

s1 = """
0 0 e E 0.3
0 1 a A 1
0 1 b B 2.5
1 2 <eps> <eps> 0.3
1 2 <eps> <eps> 0.4
1 1 f F 0.03
2 2 g G 0.8
2 3 h H 0.12
3
"""

sym1 = kaldifst.SymbolTable(name="sym1")
sym1.add_symbol("<eps>", 0)
sym1.add_symbol("a", 1)
sym1.add_symbol("b", 2)
sym1.add_symbol("c", 3)
sym1.add_symbol("d", 4)
sym1.add_symbol("e", 5)
sym1.add_symbol("f", 6)
sym1.add_symbol("g", 7)
sym1.add_symbol("h", 8)

sym2 = kaldifst.SymbolTable(name="sym2")
sym2.add_symbol("<eps>", 0)
sym2.add_symbol("A", 1)
sym2.add_symbol("B", 2)
sym2.add_symbol("C", 3)
sym2.add_symbol("D", 4)
sym2.add_symbol("E", 5)
sym2.add_symbol("F", 6)
sym2.add_symbol("G", 7)
sym2.add_symbol("H", 8)

fst = kaldifst.compile(s=s1, acceptor=False, isymbols=sym1, osymbols=sym2)

fst.input_symbols = sym1
fst.output_symbols = sym2

fst_dot = kaldifst.draw(fst, acceptor=False, portrait=True)
fst_source = graphviz.Source(fst_dot)
fst_source.render(outfile="input.svg")

succeeded, first = kaldifst.equal_align(
ifst=fst, length=4, rand_seed=3, num_retries=10
)
assert succeeded is True
first.input_symbols = sym1
first.output_symbols = sym1

first_dot = kaldifst.draw(first, acceptor=False, portrait=True)
first_source = graphviz.Source(first_dot)
first_source.render(outfile="first.svg")

succeeded, second = kaldifst.equal_align(
ifst=fst, length=5, rand_seed=10, num_retries=10
)
assert succeeded is True
second.input_symbols = sym1
second.output_symbols = sym2

second_dot = kaldifst.draw(second, acceptor=False, portrait=True)
second_source = graphviz.Source(second_dot)
second_source.render(outfile="second.svg")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

all:
python3 ./ex.py

clean:
$(RM) *.svg *.gv
Loading

0 comments on commit 45a40c1

Please sign in to comment.