-
Notifications
You must be signed in to change notification settings - Fork 2
/
translit
executable file
·244 lines (162 loc) · 5.2 KB
/
translit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#!/usr/bin/perl -w
#
# Copyright (C) 2007-2008 ...
# Alex Linke <[email protected]>
# Rona Linke <[email protected]>
# Copyright (C) 2009-2014 Lingua-Systems Software GmbH
#
use strict;
use Getopt::Long;
require 5.008;
use Lingua::Translit;
use Lingua::Translit::Tables qw/:all/;
my $VERSION = '0.4';
=pod
=head1 NAME
translit - transliterate text between various writing systems
=head1 SYNOPSIS
translit B<-t> I<name> B<-r> B<-i> I<file> B<-o> I<file> -l -v -h
=head1 DESCRIPTION
B<translit> transliterates B<UTF-8> encoded natural language text from one
writing system to another.
It utilizes the Perl module L<Lingua::Translit> to accomplish this task and
follows the provided transliteration standards as strict as possible.
B<translit> regards the following commandline switches:
=over 4
=item B<--trans>, B<-t> I<name>
Use the transliteration standard named I<name>.
=item B<--reverse>, B<-r>
Transliterate in reverse direction.
B<NOTE>: Not every transliteration is capable of being reversed.
If unsure, have a look at the list of supported transliterations using the
B<--list> switch.
=item B<--infile>, B<-i> I<FILE>
Read text from I<FILE>. Defaults to STDIN.
=item B<--outfile>, B<-o> I<FILE>
Write the transliterated text to I<FILE>. Defaults to STDOUT.
=item B<--list>, B<-l>
Print a list of supported transliterations showing their
=over 4
=item *
name
=item *
reversibility
=item *
description
=back
=item B<--verbose>, B<-v>
Print verbose status messages to STDERR.
=item B<--help>, B<-h>
Show a short help text introducing command line switches only.
=back
=cut
# Set defaults where possible
my %opt = (
trans => "",
infile => "",
outfile => "",
reverse => 0,
list => 0,
verbose => 0, # off
help => 0
);
show_help(1)
unless GetOptions(
"trans|t=s" => \$opt{trans},
"infile|i=s" => \$opt{infile},
"outfile|o=s" => \$opt{outfile},
"reverse|r" => \$opt{reverse},
"list|l" => \$opt{list},
"verbose|v" => \$opt{verbose},
"help|h" => \$opt{help}
);
show_help(0) if $opt{help};
show_list() if $opt{list};
show_help(1) unless $opt{trans};
# Assure the requested transliteration is supported...
die "$opt{trans} is not supported.\n" unless translit_supported( $opt{trans} );
# ...and reverse transliteration is supported, too - if requested
die "$opt{trans} cannot be reversed.\n"
if ( $opt{reverse} && !translit_reverse_supported( $opt{trans} ) );
# If no input file was specified, use STDIN as a fallback. This way, translit
# may also be used in pipes!
my $in;
if ( $opt{infile} ) {
print STDERR "Reading input from $opt{infile}...\n" if $opt{verbose};
open IN, "$opt{infile}" or die "$opt{infile}: $!\n";
$in = *IN;
}
else {
print STDERR "Reading input from STDIN...\n" if $opt{verbose};
$in = *STDIN;
}
# If no output file was specified, use STDOUT as a fallback.
my $out;
if ( $opt{outfile} ) {
print STDERR "Writing output to $opt{outfile}...\n" if $opt{verbose};
open OUT, "> $opt{outfile}" or die "$opt{outfile}: $!\n";
$out = *OUT;
}
else {
print STDERR "Writing output to STDOUT...\n" if $opt{verbose};
$out = *STDOUT;
}
# Slurp in all the input and close filehandle
local $/;
my $text = <$in>;
close($in);
# Transliterate
my $tr = new Lingua::Translit( $opt{trans} );
my $text_tr;
unless ( $opt{reverse} ) {
print STDERR "Transliterating according to ", $tr->name(), "...\n"
if $opt{verbose};
$text_tr = $tr->translit($text);
}
else {
print STDERR "Transliterating according to ", $tr->name(), " (reverse)...\n"
if $opt{verbose};
$text_tr = $tr->translit_reverse($text);
}
# Write the transliterated text to the filehandle and close it
print $out $text_tr;
close($out);
sub show_help {
my $retval = shift();
print "translit v$VERSION -- ",
"(c) 2009-2014 Lingua-Systems Software GmbH\n\n",
"usage: $0 -i FILE -o FILE -t NAME -r -l -v -h\n\n",
" --infile -i FILE read input from FILE\n",
" --outfile -o FILE write output to FILE\n",
" --trans -t NAME use transliteration NAME\n",
" --reverse -r transliterate in reverse direction\n",
" --list -l list all supported transliterations\n\n",
" --verbose -v print verbose status messages\n",
" --help -h show this help\n\n",
"Read translit(1) for details.\n";
exit($retval);
}
sub show_list {
print "Transliterations supported by Lingua::Translit v"
. $Lingua::Translit::VERSION . ":\n\n";
translit_list_supported();
exit(0);
}
=head1 RESTRICTIONS
The input has to be UTF-8 encoded.
=head1 BUGS
None known.
Please report bugs to [email protected].
=head1 SEE ALSO
L<Lingua::Translit>, L<Lingua::Translit::Tables>
=head1 AUTHORS
Alex Linke <[email protected]>
Rona Linke <[email protected]>
=head1 LICENSE AND COPYRIGHT
Copyright (C) 2007-2008 Alex Linke and Rona Linke
Copyright (C) 2009-2014 Lingua-Systems Software GmbH
This program is free software. It may be used, redistributed
and/or modified under the terms of either the GPL v2 or the
Artistic license.
=cut
# vim: sts=4 sw=4 ts=4 ai et