-
Notifications
You must be signed in to change notification settings - Fork 0
/
score-ref.pl
executable file
·75 lines (66 loc) · 2.33 KB
/
score-ref.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/perl
#
# Automatically score segments in parallel files containing translations of
# original and corrected segments with respect to parallel reference file
# containing corresponding reference segments.
#
# For each triple, the script outputs whether the score of the corrected
# segment was better/equal/worse than the one of the original segment
# with respect to the reference translation, followed by the score of the
# original segment, followed by the score of the corrected segment.
# This results in a parallel scoring file.
#
# The scorer can be any process that takes a reference and a candidate
# segment on stdin (each on a line), and returns the score as a number
# on stdout. Use the "--invert" option to treat higher score numbers as
# worse, which is e.g. the case with TER.
use warnings;
use FileHandle;
use IPC::Open3;
if (scalar(@ARGV) < 5) {
print "$0 <scorer-cmd-with-args> <origfile> <correctedfile> <scorefile> <reffile> [--invert]\n";
exit -1;
}
sub trim {
my $str = shift;
chomp $str;
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return $str;
}
$scorer = $ARGV[0];
$fnameO = $ARGV[1];
$fnameC = $ARGV[2];
$fnameS = $ARGV[3];
$fnameR = $ARGV[4];
$comparefactor = (scalar(@ARGV) >= 6) ? -1.0 : 1.0;
IPC::Open3::open3 (SCORERIN, SCOREROUT, SCORERERR, "$scorer");
open ORIGFILE, "$fnameO";
open CORRECTEDFILE, "$fnameC";
open SCOREFILE, ">$fnameS";
open REFFILE, "$fnameR";
while ($corrected = <CORRECTEDFILE>) {
$corrected = trim($corrected);
if (!($orig = <ORIGFILE>)) { die "$fnameO is shorter than $file!"; }
$orig = trim($orig);
if (!($ref = <REFFILE>)) { die "$fnameR is shorter than $file!"; }
$ref = trim($ref);
print SCORERIN "$ref\n";
print SCORERIN "$orig\n";
$scoreorig = <SCOREROUT>;
chomp $scoreorig;
print SCORERIN "$ref\n";
print SCORERIN "$corrected\n";
$scorecorrected = <SCOREROUT>;
chomp $scorecorrected;
if ($scorecorrected*$comparefactor > $scoreorig*$comparefactor) { $compare = "better"; }
if ($scorecorrected*$comparefactor < $scoreorig*$comparefactor) { $compare = "worse"; }
if ($scorecorrected == $scoreorig) { $compare = "equal"; }
print SCOREFILE sprintf("%s\t%s\t%s\n", $compare, $scoreorig, $scorecorrected);
}
close CORRECTEDFILE;
close ORIGFILE;
close REFFILE;
close SCOREFILE;
close SCORERIN;
close SCOREROUT;