Skip to content

Commit

Permalink
add mini_librispeech example
Browse files Browse the repository at this point in the history
  • Loading branch information
YiwenShaoStephen committed Aug 26, 2020
1 parent 79c1b7c commit f2ccc3b
Show file tree
Hide file tree
Showing 15 changed files with 511 additions and 0 deletions.
21 changes: 21 additions & 0 deletions examples/mini_librispeech/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

#export train_cmd="run.pl --mem 4G"
#export cuda_cmd="run.pl --mem 4G --gpu 1"
#export decode_cmd="run.pl --mem 4G"

# JHU setup
export train_cmd="queue.pl --mem 4G"
#export cuda_cmd="queue.pl --mem 4G --gpu 1 --config conf/gpu.conf"
export cuda_cmd="queue-freegpu.pl --mem 4G --gpu 1 --config conf/gpu.conf"
export decode_cmd="queue.pl --mem 4G"
10 changes: 10 additions & 0 deletions examples/mini_librispeech/conf/mfcc_hires.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# config for high-resolution MFCC features, intended for neural network training
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--num-mel-bins=40 # similar to Google's setup.
--num-ceps=40 # there is no dimensionality reduction.
--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so
# there might be some information at the low end.
--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
1 change: 1 addition & 0 deletions examples/mini_librispeech/local/data_prep.sh
1 change: 1 addition & 0 deletions examples/mini_librispeech/local/download_and_untar.sh
1 change: 1 addition & 0 deletions examples/mini_librispeech/local/download_lm.sh
1 change: 1 addition & 0 deletions examples/mini_librispeech/local/format_lms.sh
1 change: 1 addition & 0 deletions examples/mini_librispeech/local/prepare_dict.sh
1 change: 1 addition & 0 deletions examples/mini_librispeech/local/score_kaldi_wer.sh
15 changes: 15 additions & 0 deletions examples/mini_librispeech/path.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
MAIN_ROOT=$PWD/../..
KALDI_ROOT=$MAIN_ROOT/tools/kaldi

# BEGIN from kaldi path.sh
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
. $KALDI_ROOT/tools/config/common_path.sh
export LC_ALL=C
# END

export PATH=$MAIN_ROOT:$MAIN_ROOT/tools:$PATH
export LD_LIBRARY_PATH=$MAIN_ROOT/tools/pychain/openfst/lib:$LD_LIBRARY_PATH
export PYTHONPATH=$MAIN_ROOT:$MAIN_ROOT/tools:$MAIN_ROOT/tools/pychain:$PYTHONPATH
export PYTHONUNBUFFERED=1
107 changes: 107 additions & 0 deletions examples/mini_librispeech/prepare_e2e.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/env bash
# Copyright 2017 Hossein Hadian
# 2020 Yiwen Shao
# Apache 2.0

# To be run from ..
# Flat start chain model training.

# This script initializes a trivial tree and transition model
# for flat-start chain training. It then generates the training
# graphs for the training data.

# Begin configuration section.
cmd=run.pl
nj=4
stage=0
shared_phones=true
treedir= # If specified, the tree and model will be copied from there
# note that it may not be flat start anymore.
type=mono # Can be either mono or biphone -- either way
# the resulting tree is full (i.e. it doesn't do any tying)
ci_silence=false # If true, silence phones will be treated as context independent

scale_opts="--transition-scale=0.0 --self-loop-scale=0.0"
tie=false # If true, gmm-init-biphone will do some tying when
# creating the full biphone tree (it won't be full anymore).
# Specifically, it will revert to monophone if the data
# counts for a biphone are smaller than min_biphone_count.
# If the monophone count is also smaller than min_monophone_count,
# it will revert to a shared global phone. Note that this
# only affects biphone models (i.e., type=biphone) which
# use the special chain topology.
min_biphone_count=100
min_monophone_count=20
# End configuration section.

echo "$0 $@" # Print the command line for logging

if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;

if [ $# != 3 ]; then
echo "Usage: steps/prepare_e2e.sh [options] <data-dir> <lang-dir> <tree-dir>"
echo " e.g.: steps/prepare_e2e.sh data/train data/lang_chain exp/chain/e2e_tree"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --type <mono | biphone> # context dependency type"
echo " --tie <true | false> # enable/disable count-based tying"
exit 1;
fi

data=$1
lang=$2
dir=$3

if [[ "$type" != "mono" && "$type" != "biphone" ]]; then
echo "'type' should be either mono or biphone."
exit 1;
fi

mkdir -p $dir/log

echo $scale_opts > $dir/scale_opts # just for easier reference (it is in the logs too)
echo $nj > $dir/num_jobs
sdata=$data/split$nj;
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;

cp $lang/phones.txt $dir || exit 1;

[ ! -f $lang/phones/sets.int ] && exit 1;

if $shared_phones; then
shared_phones_opt="--shared-phones=$lang/phones/sets.int"
fi

ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
if $ci_silence; then
ci_opt="--ci-phones=$ciphonelist"
fi

tie_opts=
if $tie && [[ "$type" = "biphone" ]]; then
cat $data/text | steps/chain/e2e/text_to_phones.py --edge-silprob 0 \
--between-silprob 0 \
$lang | \
cut -d' ' -f 2- | utils/sym2int.pl $lang/phones.txt | \
steps/chain/e2e/compute_biphone_stats.py $lang >$dir/phone-stats.txt
tie_opts="--min-biphone-count=$min_biphone_count \
--min-monophone-count=$min_monophone_count --phone-counts=$dir/phone-stats.txt"
fi

if [ $stage -le 0 ]; then
if [ -z $treedir ]; then
echo "$0: Initializing $type system."
# feat dim does not matter here. Just set it to 10
$cmd $dir/log/init_${type}_mdl_tree.log \
gmm-init-$type $tie_opts $ci_opt $shared_phones_opt $lang/topo 10 \
$dir/0.mdl $dir/tree || exit 1;
else
echo "$0: Copied tree/mdl from $treedir." >$dir/log/init_mdl_tree.log
cp $treedir/final.mdl $dir/0.mdl || exit 1;
cp $treedir/tree $dir || exit 1;
fi
copy-transition-model $dir/0.mdl $dir/0.trans_mdl
ln -s 0.mdl $dir/final.mdl # for consistency with scripts which require a final.mdl
fi
52 changes: 52 additions & 0 deletions examples/mini_librispeech/prepare_feat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash
# Copyright (c) Yiwen Shao

# Apache 2.0

# data related
rootdir=data
dumpdir=data/dump # directory to dump full features

train_set=train_clean_5
valid_set=dev_clean_2

train_subset_size=0
stage=0

# feature configuration
do_delta=false

. ./path.sh
. ./cmd.sh
. ./utils/parse_options.sh


if [ ${stage} -le 0 ]; then
echo "Extracting MFCC features"
for x in $train_set $valid_set; do
steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 \
--mfcc-config conf/mfcc_hires.conf $rootdir/${x}
# compute global CMVN
compute-cmvn-stats scp:$rootdir/${x}/feats.scp $rootdir/${x}/cmvn.ark
done
fi

train_feat_dir=${dumpdir}/${train_set}; mkdir -p ${train_feat_dir}
valid_feat_dir=${dumpdir}/${valid_set}; mkdir -p ${valid_feat_dir}

if [ ${stage} -le 1 ]; then
echo "Dumping Features with CMVN"
dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \
$rootdir/${train_set}/feats.scp $rootdir/${train_set}/cmvn.ark ${train_feat_dir}/log ${train_feat_dir}
dump.sh --cmd "$train_cmd" --nj 4 --do_delta $do_delta \
$rootdir/${valid_set}/feats.scp $rootdir/${valid_set}/cmvn.ark ${valid_feat_dir}/log ${valid_feat_dir}

fi

# randomly select a subset of train set for optional diagnosis
if [ $train_subset_size -gt 0 ]; then
train_subset_feat_dir=${dumpdir}/${train_set}_${train_subset_size}; mkdir -p ${train_subset_feat_dir}
utils/subset_data_dir.sh $rootdir/${train_set} ${train_subset_size} $rootdir/${train_set}_${train_subset_size}
utils/filter_scp.pl $rootdir/${train_set}_${train_subset_size}/utt2spk ${train_feat_dir}/feats.scp \
> ${train_subset_feat_dir}/feats.scp
fi
94 changes: 94 additions & 0 deletions examples/mini_librispeech/prepare_graph.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/bash
# Copyright (c) Yiwen Shao
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

set -e -o pipefail

stage=0
train_set=train_clean_5
valid_set=dev_clean_2
rootdir=data

langdir=data/lang
graphdir=data/graph
type=mono
unit=phone

nj=10

. ./path.sh
. ./cmd.sh
. ./utils/parse_options.sh

lang=$langdir/lang_${type}${unit}_e2e
graph=$graphdir/${type}${unit}

if [ $stage -le 0 ]; then
echo "$0: Stage 0: Phone LM estimating"
rm -rf $lang
cp -r $langdir/lang_nosp $lang
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
# topology.
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo

echo "Estimating a phone language model for the denominator graph..."
mkdir -p $graph/log
$train_cmd $graph/log/make_phone_lm.log \
cat $rootdir/$train_set/text \| \
steps/nnet3/chain/e2e/text_to_phones.py --between-silprob 0.1 \
$langdir/lang_nosp \| \
utils/sym2int.pl -f 2- $langdir/lang_nosp/phones.txt \| \
chain-est-phone-lm --num-extra-lm-states=2000 \
ark:- $graph/phone_lm.fst
fi

if [ $stage -le 1 ]; then
echo "$0: Stage 1: Graph generation..."
if [ $type == "bi" ]; then
type_arg=biphone # prepare_e2e.sh take either "mono" or "biphone" as arguments
else
type_arg=$type
fi
prepare_e2e.sh --nj $nj --cmd "$train_cmd" \
--type $type_arg \
--shared-phones true \
$rootdir/$train_set $lang $graph
echo "Making denominator graph..."
$train_cmd $graph/log/make_den_fst.log \
chain-make-den-fst $graph/tree $graph/0.trans_mdl \
$graph/phone_lm.fst \
$graph/den.fst $graph/normalization.fst
fi


if [ $stage -le 2 ]; then
echo "Making numerator graph..."
lex=$lang/L.fst
oov_sym=`cat $lang/oov.int` || exit 1;
for x in $train_set $valid_set; do
sdata=$rootdir/$x/split$nj;
[[ -d $sdata && $rootdir/$x/feats.scp -ot $sdata ]] || split_data.sh $rootdir/$x $nj || exit 1;
$train_cmd JOB=1:$nj $graph/$x/log/compile_graphs.JOB.log \
compile-train-graphs $scale_opts --read-disambig-syms=$lang/phones/disambig.int \
$graph/tree $graph/0.mdl $lex \
"ark:sym2int.pl --map-oov $oov_sym -f 2- $lang/words.txt < $sdata/JOB/text|" \
"ark,scp:$graph/$x/fst.JOB.ark,$graph/$x/fst.JOB.scp" || exit 1;
$train_cmd JOB=1:$nj $graph/$x/log/make_num_fst.JOB.log \
chain-make-num-fst-e2e $graph/0.trans_mdl $graph/normalization.fst \
scp:$graph/$x/fst.JOB.scp ark,scp:$graph/$x/num.JOB.ark,$graph/$x/num.JOB.scp
for id in $(seq $nj); do cat $graph/$x/num.$id.scp; done > $graph/$x/num.scp
done
fi

if [ $stage -le 3 ]; then
echo "Making HCLG full graph..."
utils/lang/check_phones_compatible.sh \
$langdir/lang_nosp_test_tgsmall/phones.txt $lang/phones.txt
utils/mkgraph.sh \
--self-loop-scale 1.0 $langdir/lang_nosp_test_tgsmall \
$graph $graph/graph_tgsmall || exit 1;
fi
Loading

0 comments on commit f2ccc3b

Please sign in to comment.