-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
79c1b7c
commit f2ccc3b
Showing
15 changed files
with
511 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# you can change cmd.sh depending on what type of queue you are using. | ||
# If you have no queueing system and want to run on a local machine, you | ||
# can change all instances 'queue.pl' to run.pl (but be careful and run | ||
# commands one by one: most recipes will exhaust the memory on your | ||
# machine). queue.pl works with GridEngine (qsub). slurm.pl works | ||
# with slurm. Different queues are configured differently, with different | ||
# queue names and different ways of specifying things like memory; | ||
# to account for these differences you can create and edit the file | ||
# conf/queue.conf to match your queue's configuration. Search for | ||
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, | ||
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. | ||
|
||
#export train_cmd="run.pl --mem 4G" | ||
#export cuda_cmd="run.pl --mem 4G --gpu 1" | ||
#export decode_cmd="run.pl --mem 4G" | ||
|
||
# JHU setup | ||
export train_cmd="queue.pl --mem 4G" | ||
#export cuda_cmd="queue.pl --mem 4G --gpu 1 --config conf/gpu.conf" | ||
export cuda_cmd="queue-freegpu.pl --mem 4G --gpu 1 --config conf/gpu.conf" | ||
export decode_cmd="queue.pl --mem 4G" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# config for high-resolution MFCC features, intended for neural network training | ||
# Note: we keep all cepstra, so it has the same info as filterbank features, | ||
# but MFCC is more easily compressible (because less correlated) which is why | ||
# we prefer this method. | ||
--use-energy=false # use average of log energy, not energy. | ||
--num-mel-bins=40 # similar to Google's setup. | ||
--num-ceps=40 # there is no dimensionality reduction. | ||
--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so | ||
# there might be some information at the low end. | ||
--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../tools/kaldi/egs/mini_librispeech/s5/local/data_prep.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../tools/kaldi/egs/mini_librispeech/s5/local/download_and_untar.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../tools/kaldi/egs/mini_librispeech/s5/local/download_lm.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../tools/kaldi/egs/mini_librispeech/s5/local/format_lms.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../tools/kaldi/egs/mini_librispeech/s5/local/prepare_dict.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../tools/kaldi/egs/wsj/s5/steps/scoring/score_kaldi_wer.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
MAIN_ROOT=$PWD/../.. | ||
KALDI_ROOT=$MAIN_ROOT/tools/kaldi | ||
|
||
# BEGIN from kaldi path.sh | ||
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh | ||
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH | ||
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 | ||
. $KALDI_ROOT/tools/config/common_path.sh | ||
export LC_ALL=C | ||
# END | ||
|
||
export PATH=$MAIN_ROOT:$MAIN_ROOT/tools:$PATH | ||
export LD_LIBRARY_PATH=$MAIN_ROOT/tools/pychain/openfst/lib:$LD_LIBRARY_PATH | ||
export PYTHONPATH=$MAIN_ROOT:$MAIN_ROOT/tools:$MAIN_ROOT/tools/pychain:$PYTHONPATH | ||
export PYTHONUNBUFFERED=1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#!/usr/bin/env bash | ||
# Copyright 2017 Hossein Hadian | ||
# 2020 Yiwen Shao | ||
# Apache 2.0 | ||
|
||
# To be run from .. | ||
# Flat start chain model training. | ||
|
||
# This script initializes a trivial tree and transition model | ||
# for flat-start chain training. It then generates the training | ||
# graphs for the training data. | ||
|
||
# Begin configuration section. | ||
cmd=run.pl | ||
nj=4 | ||
stage=0 | ||
shared_phones=true | ||
treedir= # If specified, the tree and model will be copied from there | ||
# note that it may not be flat start anymore. | ||
type=mono # Can be either mono or biphone -- either way | ||
# the resulting tree is full (i.e. it doesn't do any tying) | ||
ci_silence=false # If true, silence phones will be treated as context independent | ||
|
||
scale_opts="--transition-scale=0.0 --self-loop-scale=0.0" | ||
tie=false # If true, gmm-init-biphone will do some tying when | ||
# creating the full biphone tree (it won't be full anymore). | ||
# Specifically, it will revert to monophone if the data | ||
# counts for a biphone are smaller than min_biphone_count. | ||
# If the monophone count is also smaller than min_monophone_count, | ||
# it will revert to a shared global phone. Note that this | ||
# only affects biphone models (i.e., type=biphone) which | ||
# use the special chain topology. | ||
min_biphone_count=100 | ||
min_monophone_count=20 | ||
# End configuration section. | ||
|
||
echo "$0 $@" # Print the command line for logging | ||
|
||
if [ -f path.sh ]; then . ./path.sh; fi | ||
. parse_options.sh || exit 1; | ||
|
||
if [ $# != 3 ]; then | ||
echo "Usage: steps/prepare_e2e.sh [options] <data-dir> <lang-dir> <tree-dir>" | ||
echo " e.g.: steps/prepare_e2e.sh data/train data/lang_chain exp/chain/e2e_tree" | ||
echo "main options (for others, see top of script file)" | ||
echo " --config <config-file> # config containing options" | ||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." | ||
echo " --type <mono | biphone> # context dependency type" | ||
echo " --tie <true | false> # enable/disable count-based tying" | ||
exit 1; | ||
fi | ||
|
||
data=$1 | ||
lang=$2 | ||
dir=$3 | ||
|
||
if [[ "$type" != "mono" && "$type" != "biphone" ]]; then | ||
echo "'type' should be either mono or biphone." | ||
exit 1; | ||
fi | ||
|
||
mkdir -p $dir/log | ||
|
||
echo $scale_opts > $dir/scale_opts # just for easier reference (it is in the logs too) | ||
echo $nj > $dir/num_jobs | ||
sdata=$data/split$nj; | ||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; | ||
|
||
cp $lang/phones.txt $dir || exit 1; | ||
|
||
[ ! -f $lang/phones/sets.int ] && exit 1; | ||
|
||
if $shared_phones; then | ||
shared_phones_opt="--shared-phones=$lang/phones/sets.int" | ||
fi | ||
|
||
ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1; | ||
if $ci_silence; then | ||
ci_opt="--ci-phones=$ciphonelist" | ||
fi | ||
|
||
tie_opts= | ||
if $tie && [[ "$type" = "biphone" ]]; then | ||
cat $data/text | steps/chain/e2e/text_to_phones.py --edge-silprob 0 \ | ||
--between-silprob 0 \ | ||
$lang | \ | ||
cut -d' ' -f 2- | utils/sym2int.pl $lang/phones.txt | \ | ||
steps/chain/e2e/compute_biphone_stats.py $lang >$dir/phone-stats.txt | ||
tie_opts="--min-biphone-count=$min_biphone_count \ | ||
--min-monophone-count=$min_monophone_count --phone-counts=$dir/phone-stats.txt" | ||
fi | ||
|
||
if [ $stage -le 0 ]; then | ||
if [ -z $treedir ]; then | ||
echo "$0: Initializing $type system." | ||
# feat dim does not matter here. Just set it to 10 | ||
$cmd $dir/log/init_${type}_mdl_tree.log \ | ||
gmm-init-$type $tie_opts $ci_opt $shared_phones_opt $lang/topo 10 \ | ||
$dir/0.mdl $dir/tree || exit 1; | ||
else | ||
echo "$0: Copied tree/mdl from $treedir." >$dir/log/init_mdl_tree.log | ||
cp $treedir/final.mdl $dir/0.mdl || exit 1; | ||
cp $treedir/tree $dir || exit 1; | ||
fi | ||
copy-transition-model $dir/0.mdl $dir/0.trans_mdl | ||
ln -s 0.mdl $dir/final.mdl # for consistency with scripts which require a final.mdl | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/bin/bash | ||
# Copyright (c) Yiwen Shao | ||
|
||
# Apache 2.0 | ||
|
||
# data related | ||
rootdir=data | ||
dumpdir=data/dump # directory to dump full features | ||
|
||
train_set=train_clean_5 | ||
valid_set=dev_clean_2 | ||
|
||
train_subset_size=0 | ||
stage=0 | ||
|
||
# feature configuration | ||
do_delta=false | ||
|
||
. ./path.sh | ||
. ./cmd.sh | ||
. ./utils/parse_options.sh | ||
|
||
|
||
if [ ${stage} -le 0 ]; then | ||
echo "Extracting MFCC features" | ||
for x in $train_set $valid_set; do | ||
steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 \ | ||
--mfcc-config conf/mfcc_hires.conf $rootdir/${x} | ||
# compute global CMVN | ||
compute-cmvn-stats scp:$rootdir/${x}/feats.scp $rootdir/${x}/cmvn.ark | ||
done | ||
fi | ||
|
||
train_feat_dir=${dumpdir}/${train_set}; mkdir -p ${train_feat_dir} | ||
valid_feat_dir=${dumpdir}/${valid_set}; mkdir -p ${valid_feat_dir} | ||
|
||
if [ ${stage} -le 1 ]; then | ||
echo "Dumping Features with CMVN" | ||
dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \ | ||
$rootdir/${train_set}/feats.scp $rootdir/${train_set}/cmvn.ark ${train_feat_dir}/log ${train_feat_dir} | ||
dump.sh --cmd "$train_cmd" --nj 4 --do_delta $do_delta \ | ||
$rootdir/${valid_set}/feats.scp $rootdir/${valid_set}/cmvn.ark ${valid_feat_dir}/log ${valid_feat_dir} | ||
|
||
fi | ||
|
||
# randomly select a subset of train set for optional diagnosis | ||
if [ $train_subset_size -gt 0 ]; then | ||
train_subset_feat_dir=${dumpdir}/${train_set}_${train_subset_size}; mkdir -p ${train_subset_feat_dir} | ||
utils/subset_data_dir.sh $rootdir/${train_set} ${train_subset_size} $rootdir/${train_set}_${train_subset_size} | ||
utils/filter_scp.pl $rootdir/${train_set}_${train_subset_size}/utt2spk ${train_feat_dir}/feats.scp \ | ||
> ${train_subset_feat_dir}/feats.scp | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
#!/bin/bash | ||
# Copyright (c) Yiwen Shao | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
set -e -o pipefail | ||
|
||
stage=0 | ||
train_set=train_clean_5 | ||
valid_set=dev_clean_2 | ||
rootdir=data | ||
|
||
langdir=data/lang | ||
graphdir=data/graph | ||
type=mono | ||
unit=phone | ||
|
||
nj=10 | ||
|
||
. ./path.sh | ||
. ./cmd.sh | ||
. ./utils/parse_options.sh | ||
|
||
lang=$langdir/lang_${type}${unit}_e2e | ||
graph=$graphdir/${type}${unit} | ||
|
||
if [ $stage -le 0 ]; then | ||
echo "$0: Stage 0: Phone LM estimating" | ||
rm -rf $lang | ||
cp -r $langdir/lang_nosp $lang | ||
silphonelist=$(cat $lang/phones/silence.csl) || exit 1; | ||
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1; | ||
# Use our special topology... note that later on may have to tune this | ||
# topology. | ||
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo | ||
|
||
echo "Estimating a phone language model for the denominator graph..." | ||
mkdir -p $graph/log | ||
$train_cmd $graph/log/make_phone_lm.log \ | ||
cat $rootdir/$train_set/text \| \ | ||
steps/nnet3/chain/e2e/text_to_phones.py --between-silprob 0.1 \ | ||
$langdir/lang_nosp \| \ | ||
utils/sym2int.pl -f 2- $langdir/lang_nosp/phones.txt \| \ | ||
chain-est-phone-lm --num-extra-lm-states=2000 \ | ||
ark:- $graph/phone_lm.fst | ||
fi | ||
|
||
if [ $stage -le 1 ]; then | ||
echo "$0: Stage 1: Graph generation..." | ||
if [ $type == "bi" ]; then | ||
type_arg=biphone # prepare_e2e.sh take either "mono" or "biphone" as arguments | ||
else | ||
type_arg=$type | ||
fi | ||
prepare_e2e.sh --nj $nj --cmd "$train_cmd" \ | ||
--type $type_arg \ | ||
--shared-phones true \ | ||
$rootdir/$train_set $lang $graph | ||
echo "Making denominator graph..." | ||
$train_cmd $graph/log/make_den_fst.log \ | ||
chain-make-den-fst $graph/tree $graph/0.trans_mdl \ | ||
$graph/phone_lm.fst \ | ||
$graph/den.fst $graph/normalization.fst | ||
fi | ||
|
||
|
||
if [ $stage -le 2 ]; then | ||
echo "Making numerator graph..." | ||
lex=$lang/L.fst | ||
oov_sym=`cat $lang/oov.int` || exit 1; | ||
for x in $train_set $valid_set; do | ||
sdata=$rootdir/$x/split$nj; | ||
[[ -d $sdata && $rootdir/$x/feats.scp -ot $sdata ]] || split_data.sh $rootdir/$x $nj || exit 1; | ||
$train_cmd JOB=1:$nj $graph/$x/log/compile_graphs.JOB.log \ | ||
compile-train-graphs $scale_opts --read-disambig-syms=$lang/phones/disambig.int \ | ||
$graph/tree $graph/0.mdl $lex \ | ||
"ark:sym2int.pl --map-oov $oov_sym -f 2- $lang/words.txt < $sdata/JOB/text|" \ | ||
"ark,scp:$graph/$x/fst.JOB.ark,$graph/$x/fst.JOB.scp" || exit 1; | ||
$train_cmd JOB=1:$nj $graph/$x/log/make_num_fst.JOB.log \ | ||
chain-make-num-fst-e2e $graph/0.trans_mdl $graph/normalization.fst \ | ||
scp:$graph/$x/fst.JOB.scp ark,scp:$graph/$x/num.JOB.ark,$graph/$x/num.JOB.scp | ||
for id in $(seq $nj); do cat $graph/$x/num.$id.scp; done > $graph/$x/num.scp | ||
done | ||
fi | ||
|
||
if [ $stage -le 3 ]; then | ||
echo "Making HCLG full graph..." | ||
utils/lang/check_phones_compatible.sh \ | ||
$langdir/lang_nosp_test_tgsmall/phones.txt $lang/phones.txt | ||
utils/mkgraph.sh \ | ||
--self-loop-scale 1.0 $langdir/lang_nosp_test_tgsmall \ | ||
$graph $graph/graph_tgsmall || exit 1; | ||
fi |
Oops, something went wrong.