-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathconvertIndices.pl
More file actions
executable file
·86 lines (74 loc) · 2.13 KB
/
convertIndices.pl
File metadata and controls
executable file
·86 lines (74 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl
use HTML::TableExtract;
sub trim {
return $_[0] =~ s/^[\p{FORMAT}\s]+|[\p{FORMAT}\s]+$//rg;
}
my @files = <sources/www.cs.utexas.edu/~EWD/index??xx.html>;
my @ewds = ();
my %language = {};
open LANG, "meta/languages.tsv" or die;
while (<LANG> =~ /(.*)\t(.*)/g) {
$language{"$1.html"} = $2;
}
my %skip = {};
$skip{'EWD645.html'} = true; # missing/empty upstream
$/ = undef;
foreach my $file (@files) {
open FILE, $file or die "Couldn't open file $file: $!";
binmode(FILE, ":utf8");
my $content = <FILE>;
my $te = HTML::TableExtract->new(headers => ['EWD number', 'Title', 'transcriptions']);
$te->parse($content);
foreach $ts ($te->tables) {
foreach $row ($ts->rows) {
my $file = trim($$row[2]);
$file =~ s/(.*)\.html?.*/\1.html/mis;
$file =~ s/\s//g;
$file =~ s/ewd/EWD/;
my $title = trim($$row[1]);
$title =~ s/ \(English\)//;
my %ewd = (
'nr' => trim($$row[0]),
'title' => $title,
'file' => $file,
'language' => $language{$file}
);
if ($ewd{'file'} !~ /^\s*$/ && !$skip{$ewd{'file'}}) {
push(@ewds, \%ewd);
}
}
}
}
open INDEX, ">target/index.html" or die "Could not open index";
binmode(INDEX, ":utf8");
print INDEX <<EOF;
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<link href="https://fonts.googleapis.com/css?family=Lobster|Raleway" rel="stylesheet">
<link href="assets/common.css" rel="stylesheet">
<link href="assets/index.css" rel="stylesheet">
<script src="assets/index.js"></script>
</head>
<body>
<div class="metabar">
<div class="metabar-inner">
<a id="nl" class="lang" href="#" onclick="disableLang('nl')">NL</a>
<a id="nonl" class="inactive" href="#" onclick="enableLang('nl')">NL</a>
<a id="en" class="lang" href="#" onclick="disableLang('en')">EN</a>
<a id="noen" class="inactive" href="#" onclick="enableLang('en')">EN</a>
</div>
</div>
<h1>Edsger W. Dijkstra</h1>
<div class="body">
<ul>
EOF
foreach my $ewd (reverse @ewds) {
print INDEX "<li class='ewd $$ewd{'language'}'><a href='$$ewd{'file'}'>$$ewd{'title'}</a></li>\n";
}
print INDEX <<EOF;
</ul>
</div>
</body>
EOF