-
Notifications
You must be signed in to change notification settings - Fork 0
/
score-lm.pl
executable file
·69 lines (60 loc) · 1.79 KB
/
score-lm.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/perl
#
# Score segments of parallel documents containing original and corrected
# segments using KenLM and a given language model.
# The output file contains the scores, as well as the BETTER/WORSE/EQUAL
# ranking for each segment. The output file is thus line-aligned to
# the original and corrected file.
use warnings;
use FileHandle;
use IPC::Open3;
if (scalar(@ARGV) < 4) {
print "$0 <origfile> <correctedfile> <scorefile> <language model file>\n";
exit -1;
}
sub get_score
{
my $result = shift;
chomp ($result);
if ($result =~ /Total:\s+(\S+)\s+OOV:/) {
return $1;
}
return -1000.0;
}
sub trim {
my $str = shift;
chomp $str;
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return $str;
}
$fnameO = $ARGV[0];
$fnameC = $ARGV[1];
$fnameS = $ARGV[2];
$lm = $ARGV[3];
$MOSES_DIR=$ENV{MOSES_DIR};
$scorer = "$MOSES_DIR/lm/query $lm";
IPC::Open3::open3 (SCORERIN, SCOREROUT, SCORERERR, "$scorer");
open ORIGFILE, "$fnameO";
open CORRECTEDFILE, "$fnameC";
open SCOREFILE, ">$fnameS";
while ($corrected = <CORRECTEDFILE>) {
$corrected = trim($corrected);
if (!($orig = <ORIGFILE>)) { die "$fnameO is shorter than $fnameC!"; };
$orig = trim($orig);
print SCORERIN "<s> $orig </s>\n";
$scoreorigres = <SCOREROUT>;
$scoreorig = get_score($scoreorigres);
print SCORERIN "<s> $corrected </s>\n";
$scorecorrectedres = <SCOREROUT>;
$scorecorrected = get_score($scorecorrectedres);
if ($scorecorrected > $scoreorig) { $compare = "better"; }
if ($scorecorrected < $scoreorig) { $compare = "worse"; }
if ($scorecorrected == $scoreorig) { $compare = "equal"; }
print SCOREFILE sprintf("%s\t%s\t%s\n", $compare, $scoreorig, $scorecorrected);
}
close CORRECTEDFILE;
close ORIGFILE;
close SCOREFILE;
close SCORERIN;
close SCOREROUT;