-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtblcsort
executable file
·105 lines (89 loc) · 1.9 KB
/
tblcsort
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/perl
# tblcsort: sort columns of a CSV file
# Copyright(c) 2008 EURAC, Institute of Genetic Medicine
use strict;
use warnings;
use locale;
use Getopt::Std;
my %flags;
getopts("nh", \%flags);
my $SEP = ($ENV{'TBLSEP'} || "\t");
my ($file, @ord) = @ARGV;
if(!$file || defined($flags{'h'}))
{
print(STDERR qq{$0 [-nh] file [columns order]:
Sort columns of a CSV file, alphabetically, numerically, or manually.
CSV files are TAB separated, containing column labels on the first row.
You can change the column separator by setting the TBLSEP environment variable.
-n: sort numerically
-h: help summary
});
exit(2);
}
sub find(\@$)
{
my ($arr, $elem) = @_;
foreach my $p(0 .. $#{$arr})
{
return $p if($arr->[$p] eq $elem);
}
return -1;
}
sub genmap($$)
{
my ($cols, $ord) = @_;
my @new;
if(@{$ord})
{
@new = @{$ord};
foreach my $c(@{$cols})
{
push(@new, $c) if(find(@new, $c) < 0);
}
}
else
{
if(defined($flags{'n'}))
{ @new = sort({($a =~ /^\d+$/ && $b =~ /^\d+$/? $a <=> $b: $a cmp $b)} @{$cols}); }
else
{ @new = sort({$a cmp $b} @{$cols}); }
}
if($#{$cols} != $#new)
{
die("too many [unknown] columns given on the command line");
}
my @map;
foreach my $p(0 .. $#{$cols})
{
my $np = find(@new, $cols->[$p]);
$map[$np] = $p;
}
return @map;
}
# open the file and read columns
open(FD, $file) or die("cannot open $file\n");
$_ = <FD>;
s/[\r\n]+$//;
my @cols = split($SEP, $_, -1);
my @map = genmap(\@cols, \@ord);
# remap (labels)
my @out;
foreach my $n(0 .. $#cols)
{
push(@out, $cols[($map[$n])]);
}
print(join($SEP, @out) . "\n");
# remap (contents)
while(<FD>)
{
s/[\r\n]+$//;
my @data = split($SEP, $_, -1);
die("line error at $file:$.:$#data") if($#data != $#{cols});
my @out;
foreach my $n(0 .. $#cols)
{
push(@out, $data[($map[$n])]);
}
print(join($SEP, @out) . "\n");
}
close(FD);