-
Notifications
You must be signed in to change notification settings - Fork 2
/
check_mapping
executable file
·109 lines (81 loc) · 2.88 KB
/
check_mapping
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env perl -w
use strict;
use Text::CSV;
use DateTime;
use FileHandle;
use Getopt::Long;
use Pod::Usage;
use Carp;
my ($IN_SEPARATOR, $OUT_SEPARATOR) = (',', ',');
my ($HELP, $MAN);
GetOptions(
'help|?' => \$HELP,
'man' => \$MAN,
'in-separator=s' => \$IN_SEPARATOR,
'out-separator=s' => \$OUT_SEPARATOR
);
pod2usage(-exitstatus => 0, -verbose => 1) if $HELP;
pod2usage(-exitstatus => 0, -verbose => 2) if $MAN;
my ($SUPERSET_KEY, $SUPERSET_FILE, $SUBSET_KEY, $SUBSET_FILE) = @ARGV;
pod2usage(-exitstatus => 1, -verbose => 1) unless ($SUBSET_KEY);
my $CSV_IN = Text::CSV->new({ binary => 1, sep_char => $IN_SEPARATOR });
my $CSV_OUT = Text::CSV->new({ binary => 1, sep_char => $OUT_SEPARATOR });
sub main() {
my %superset_keys = ();
process_csv($SUPERSET_FILE, \&build_superset_keys,
[ $SUPERSET_KEY, \%superset_keys ]);
process_csv($SUBSET_FILE, \&dump_bad_subset_line,
[ $SUBSET_KEY, \%superset_keys ]);
}
sub process_csv($$$) {
my ($file, $sub, $params) = @_;
my $fh = $file
? new FileHandle($file, 'r')
: *STDIN;
croak "Cannot open '$file' for reading: $!" unless $fh;
my @headers = @{ $CSV_IN->getline($fh) or croak "Cannot parse header line: $!" };
$CSV_IN->column_names(@headers); # Set column names for getline_hr ()
until ($CSV_IN->eof) {
my $hash = $CSV_IN->getline_hr($fh) or next;
$sub->($hash, @$params);
}
close $fh if $file;
}
sub build_superset_keys($$$) {
my ($hash, $key, $keys) = @_;
$key = '' unless defined $key;
my $value = defined $hash->{$key} ? $hash->{$key} : '';
$keys->{$value} = 1;
}
sub dump_bad_subset_line($$$$) {
my ($hash, $key, $sup_keys) = @_;
die unless $key;
warn "undefined hash->{$key}" unless defined $hash->{$key};
unless (defined $sup_keys->{$hash->{$key}}) {
printf("%s (line $.)\n", $hash->{$key});
}
}
main();
__END__
=head1 NAME
check_mapping - referencial integrity checking tool. List values and line numbers
of the input CSV that violate the specified referencial integrity
constrain.
=head1 SYNOPSIS
check_mapping [options] pk_column_name referenced_csv_file \
fk_column_name processed_csv_file
check_mapping [options] fk_column_name referenced_csv_file \
pk_column_name < processed_csv_file
Example:
#
# list 'Employee ID' values in salaries.csv that don't match a value of
# 'ID' in employees.csv
#
check_mapping salaries.csv 'Employee ID' employees.csv 'ID'
=head1 OPTIONS
--help, -h short help screen
--man slightly more detailed documentation
--in-separator=char field separator in the source file ("," by default)
--out-separator=char output field separator ("," by default)
=head1 AUTHORS
Pavel Kolesnikov <[email protected]>