forked from jniehues-kit/SLT.KIT
-
Notifications
You must be signed in to change notification settings - Fork 14
/
Train.sh
executable file
·147 lines (95 loc) · 5.19 KB
/
Train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/bin/bash
source ../Config.sh
export systemName=how2
export sl=en
export tl=pt
export BASEDIR=$IWSLTDIR/how2/
export BPESIZE=10000
export LAYER=12
export TRANSFORMER=stochastic_transformer
export OPTIM=Adam
export LR=0.001
echo $BASEDIR
############## MT #############################
mkdir -p $BASEDIR/data/orig/
#Downlaod Data
cd $BASEDIR/data/orig/
mkdir -p parallel
mkdir -p valid
cd parallel
ln -s ../how2-300h-v1/data/train/text.pt how2.t
ln -s ../how2-300h-v1/data/train/text.en how2.s
cd ../valid
ln -s ../how2-300h-v1/data/val/text.pt how2-val.t
ln -s ../how2-300h-v1/data/val/text.en how2-val.s
cd ..
mkdir eval/dev5 -p
cd eval/dev5
ln -s ../../how2-300h-v1/data/dev5/text.pt dev5.pt
ln -s ../../how2-300h-v1/data/dev5/text.en dev5.en
$SLTKITDIR/scripts/defaultPreprocessor/Train.sh orig prepro
$SLTKITDIR/scripts/NMTGMinor/Train.sh prepro mt
############## ASR #############################
export ENC_LAYER=32
cd $BASEIDR/data/prepro/train
ln -s ../../orig/how2-300h-v1/data/train/feats.scp how2.scp
cd -
cd $BASEIDR/data/prepro/valid
ln -s ../../orig/how2-300h-v1/data/val/feats.scp how2-val.scp
cd -
$SLTKITDIR/scripts/NMTGMinor/Train.speech.sh prepro asr s
$SLTKITDIR/scripts/NMTGMinor/Cont.speech.sh prepro asr asr.cont s
############## SLT #############################
export ENC_LAYER=32
$SLTKITDIR/scripts/NMTGMinor/Train.speech.sh prepro slt t
$SLTKITDIR/scripts/NMTGMinor/Cont.speech.sh prepro slt slt.cont t
$SLTKITDIR/scripts/NMTGMinor/Cont.speech.sh prepro slt.cont slt.cont2 t
for tst in dev5
do
$SLTKITDIR/scripts/defaultPreprocessor/Translate.sh $tst prepro
$SLTKITDIR/scripts/NMTGMinor/Translate.sh manualTranscript.$tst prepro mt
$SLTKITDIR/scripts/NMTGMinor/Translate.speech.sh $tst prepro asr
$SLTKITDIR/scripts/NMTGMinor/Translate.speech.sh $tst prepro asr.cont
cp $BASEDIR/data/asr/eval/dev5.t $BASEDIR/data/asr/eval/dev5.s
$SLTKITDIR/scripts/NMTGMinor/Translate.sh $tst asr mt
cp $BASEDIR/data/asr.cont/eval/dev5.t $BASEDIR/data/asr.cont/eval/cont.dev5.s
$SLTKITDIR/scripts/NMTGMinor/Translate.sh cont.$tst asr.cont mt
$SLTKITDIR/scripts/NMTGMinor/Translate.speech.sh $tst prepro slt
export BEAMSIZE=8
$SLTKITDIR/scripts/NMTGMinor/Translate.sh manualTranscript.$tst prepro mt
$SLTKITDIR/scripts/NMTGMinor/Translate.speech.sh $tst prepro asr
$SLTKITDIR/scripts/NMTGMinor/Translate.speech.sh $tst prepro asr.cont
cp $BASEDIR/data/asr/eval/dev5.beam$BEAMSIZE.t $BASEDIR/data/asr/eval/dev5.beam$BEAMSIZE.s
$SLTKITDIR/scripts/NMTGMinor/Translate.sh $tst.beam$BEAMSIZE asr mt
cp $BASEDIR/data/asr.cont/eval/dev5.beam$BEAMSIZE.t $BASEDIR/data/asr.cont/eval/cont.dev5.beam$BEAMSIZE.s
$SLTKITDIR/scripts/NMTGMinor/Translate.sh cont.$tst.beam$BEAMSIZE asr.cont mt
$SLTKITDIR/scripts/NMTGMinor/Translate.speech.sh $tst prepro slt
$SLTKITDIR/scripts/NMTGMinor/Translate.speech.sh $tst prepro slt.cont
$SLTKITDIR/scripts/NMTGMinor/Translate.speech.sh $tst prepro slt.cont2
for beam in .beam8. .
do
sed -e "s/@@ //g" $BASEDIR/data/mt/eval/manualTranscript.${tst}${beam}t | sed -e "s/@@$//g" | sed -e "s/'/'/g" -e 's/|/|/g' -e "s/&/&/g" -e 's/</>/g' -e 's/>/>/g' -e 's/"/"/g' -e 's/[/[/g' -e 's/]/]/g' | perl -nle 'print ucfirst' > $BASEDIR/data/mt/eval/manualTranscript.${tst}${beam}pt
~/.local/bin/nmtpy-coco-metrics $BASEDIR/data/mt/eval/manualTranscript.${tst}${beam}pt -r $BASEDIR/data/prepro/eval/manualTranscript.$tst.pt
for out in mt slt slt.cont
do
if [ "$beam" == ".beam8." ] && [ "$out" == "mt" ]; then
beam=.beam8.beam8.
fi
echo $beam
sed -e "s/@@ //g" $BASEDIR/data/$out/eval/${tst}${beam}t | sed -e "s/@@$//g" | sed -e "s/'/'/g" -e 's/|/|/g' -e "s/&/&/g" -e 's/</>/g' -e 's/>/>/g' -e 's/"/"/g' -e 's/[/[/g' -e 's/]/]/g' | perl -nle 'print ucfirst' > $BASEDIR/data/$out/eval/${tst}${beam}pt
~/.local/bin/nmtpy-coco-metrics $BASEDIR/data/$out/eval/${tst}${beam}pt -r $BASEDIR/data/prepro/eval/manualTranscript.$tst.pt
if [ "$out" == "mt" ]; then
sed -e "s/@@ //g" $BASEDIR/data/$out/eval/cont.${tst}${beam}t | sed -e "s/@@$//g" | sed -e "s/'/'/g" -e 's/|/|/g' -e "s/&/&/g" -e 's/</>/g' -e 's/>/>/g' -e 's/"/"/g' -e 's/[/[/g' -e 's/]/]/g' | perl -nle 'print ucfirst' > $BASEDIR/data/$out/eval/cont.${tst}${beam}pt
~/.local/bin/nmtpy-coco-metrics $BASEDIR/data/$out/eval/cont.${tst}${beam}pt -r $BASEDIR/data/prepro/eval/manualTranscript.$tst.pt
fi
done
###Eval ASR
for asr in asr asr.cont
do
sed -e "s/@@ //g" $BASEDIR/data/$asr/eval/${tst}${beam}t | sed -e "s/@@$//g" | sed -e "s/'/'/g" -e 's/|/|/g' -e "s/&/&/g" -e 's/</>/g' -e 's/>/>/g' -e 's/"/"/g' -e 's/[/[/g' -e 's/]/]/g' | sed -e "s/ '/'/g" | sed -e "s/\.//" -e "s/,//g" -e "s/\!//g" -e "s/?//g" | perl -nle 'print lc' > $BASEDIR/data/$asr/eval/${tst}${beam}asr
awk '{print $NF}' /home/dx294494/opt/how2-dataset/eval/asr/hyp.filtered.word.wer.r9216e.max150.dev5.beam10.sclite | paste $BASEDIR/data/$asr/eval/${tst}${beam}asr - > $BASEDIR/data/$asr/eval/${tst}${beam}sclite
echo "Model $asr Beam $beam"
~/opt/sctk-2.4.10/bin/sclite -r $BASEDIR/data/orig/eval/dev5/dev5.filtered.en -h $BASEDIR/data/$asr/eval/${tst}${beam}sclite -i spu_id -f 0 -o sum stdout dtl pra | grep Sum/Avg | awk '{print $11}'
done
done
done