-
Notifications
You must be signed in to change notification settings - Fork 0
/
createstats.pl
executable file
·100 lines (89 loc) · 3.18 KB
/
createstats.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/perl
# From a given summary file (created by adjoin.pl) containing for each flag
# flag info, orignal/corrected/reference segments, and scores of the various
# metrics, create a statistics file in tab-separated CSV format that
# indicates for each flag type, for each rule name, and for each metric
# in how many instances a suggestion of that rule has been applied, how
# often the translation improved according to the metric, how often it
# degraded, and how often it stayed the same.
if (scalar(@ARGV) < 2) {
print "$0 <summary-file> <outfile>\n";
exit -1;
}
$summary = shift;
$outfile = shift;
open INFILE, "$summary";
open OUTFILE, ">$outfile";
my %scores;
my @input_metrics = ('LM-EN', 'LM-DE', 'LM-FR', 'BLEU1', 'BLEU2', 'GTM1', 'GTM2', 'TER1', 'TER2', 'HUMAN');
my @output_metrics = ('LM-EN', 'LM-DE', 'LM-FR', 'BLEU1', 'BLEU2', 'GTM1', 'GTM2', 'TER1' ,'TER2', 'AVG', 'HUMAN');
my @avg_metrics = ('BLEU1', 'BLEU2', 'GTM1', 'GTM2', 'TER1', 'TER2');
my $count_threshold = 13;
while ($str = <INFILE>) {
my %record;
do {
chomp($str);
if ($str =~ (/^([^\t]*)\t(.*)$/)) {
$record{$1} = $2;
}
$str = <INFILE>;
chomp($str);
} while ($str ne "");
$info = $record{"FLAG"};
@parts = split(/\t/, $info);
if ($parts[0] eq "GRAMMAR" || $parts[0] eq "SPELLING" || $parts[0] eq "STYLE" || $parts[0] eq "TERM") {
$flagtype = $parts[0];
$rulename = $parts[1];
foreach $metric (@input_metrics) {
$scoreline = $record{$metric};
if ($scoreline ne "") {
@parts = split(/\t/, $scoreline);
$scores{$flagtype}{$rulename}{$metric}{$parts[0]}++;
$scores{$flagtype}{$rulename}{$metric}{"count"}++;
}
}
}
}
foreach $flagtype (sort keys %scores) {
foreach $rulename (sort keys %{$scores{$flagtype}}) {
foreach $column ("better", "worse", "equal", "count") {
$metrics_count = 0;
foreach $metric (@avg_metrics) {
if (exists $scores{$flagtype}{$rulename}{$metric}) {
$metrics_count++;
$scores{$flagtype}{$rulename}{"AVG"}{$column} +=
$scores{$flagtype}{$rulename}{$metric}{$column};
}
}
if ($metrics_count > 0) {
$scores{$flagtype}{$rulename}{"AVG"}{$column} /= $metrics_count;
}
}
}
}
print OUTFILE "flagtype\trule name\tflagtype for chart\trulename for chart\tmetric\tcount\tbetter\tworse\tequal\n";
foreach $flagtype (sort keys %scores) {
$flagtypeforchart = $flagtype;
foreach $rulename (sort keys %{$scores{$flagtype}}) {
$rulenameforchart = $rulename;
$rulenameforchart =~ s/_/ /g;
foreach $metric (@output_metrics) {
if (exists $scores{$flagtype}{$rulename}{$metric}) {
%data = %{$scores{$flagtype}{$rulename}{$metric}};
if ($data{'count'} >= $count_threshold) {
if ($rulenameforchart ne "") {
$rulenameforchart .= " ($data{'count'})";
}
if ($data{'better'} eq "") { $data{'better'} = "0"; }
if ($data{'worse'} eq "") { $data{'worse'} = "0"; }
if ($data{'equal'} eq "") { $data{'equal'} = "0"; }
print OUTFILE "$flagtype\t$rulename\t$flagtypeforchart\t$rulenameforchart\t$metric\t$data{'count'}\t$data{'better'}\t$data{'worse'}\t$data{'equal'}\n";
$flagtypeforchart = "";
$rulenameforchart = "";
}
}
}
}
}
close INFILE;
close OUTFILE;