-
Notifications
You must be signed in to change notification settings - Fork 28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
enable overflow for accu keys, to_analysis_urls, and submission_cmd_args #40
base: version/2.2
Are you sure you want to change the base?
Changes from all commits
1e85f0b
48b3e58
1beb0a5
d657f9d
8beee26
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ use strict; | |
use warnings; | ||
no strict 'refs'; # needed to allow AUTOLOAD create new methods | ||
use DBI 1.6; # the 1.6 functionality is important for detecting autoincrement fields and other magic. | ||
use Scalar::Util ('blessed'); | ||
|
||
use Bio::EnsEMBL::Hive::Utils ('stringify', 'throw'); | ||
|
||
|
@@ -138,6 +139,26 @@ sub overflow_limit { | |
return $self->{_overflow_limit} || $self->default_overflow_limit(); | ||
} | ||
|
||
=head2 size_limit | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The paradigm of BaseAdaptor is to have a |
||
|
||
Arg[1] : (optional) Hashref with column names as keys and column size as values | ||
Description : Getter/setter for column size limits for the implementing adaptor. | ||
If no size limit is set, this method will call _table_info_loader() to | ||
get sizes from the database schema. | ||
Returntype : Hashref | ||
|
||
=cut | ||
|
||
sub size_limit { | ||
my $self = shift @_; | ||
|
||
if(@_) { # setter | ||
$self->{_size_limit} = shift @_; | ||
} elsif ( !defined( $self->{_size_limit} ) ) { | ||
$self->_table_info_loader(); | ||
} | ||
return $self->{_size_limit}; | ||
} | ||
|
||
sub input_column_mapping { | ||
my $self = shift @_; | ||
|
@@ -229,18 +250,27 @@ sub _table_info_loader { | |
my $table_name = $self->table_name(); | ||
|
||
my %column_set = (); | ||
my %size_limit = (); | ||
my $autoinc_id = ''; | ||
my @primary_key = $dbh->primary_key(undef, undef, $table_name); | ||
|
||
my $sth = $dbh->column_info(undef, undef, $table_name, '%'); | ||
$sth->execute(); | ||
while (my $row = $sth->fetchrow_hashref()) { | ||
my ( $column_name, $column_type ) = @$row{'COLUMN_NAME', 'TYPE_NAME'}; | ||
my ( $column_name, $column_type, $size_limit ) = @$row{'COLUMN_NAME', 'TYPE_NAME', 'COLUMN_SIZE'}; | ||
|
||
# warn "ColumnInfo [$table_name/$column_name] = $column_type\n"; | ||
|
||
$column_set{$column_name} = $column_type; | ||
|
||
# PostgreSQL reports a COLUMN_SIZE of 4 for enums, which is not compatible with | ||
# the way slicer does column size checking. Likewise, PostgreSQL reports | ||
# a user-defined TYPE_NAME for enums, rather than 'enum'. Therefore, if | ||
# the DB is PostgreSQL, only set size_limit for varchars | ||
unless (($driver eq 'pgsql') && !($column_type eq 'character varying')) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe a matter of taste, but I find an if (($driver ne 'pgsql') || ($column_type eq 'character varying')) { |
||
$size_limit{$column_name} = $size_limit; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will set a size limit for all column types, incl. integers, enums and timestamps, but I'm not sure these are correctly enforceable:
I would suggest to only populate |
||
} | ||
|
||
if( ($column_name eq $table_name.'_id') | ||
or ($table_name eq 'analysis_base' and $column_name eq 'analysis_id') ) { # a special case (historical) | ||
$autoinc_id = $column_name; | ||
|
@@ -249,6 +279,7 @@ sub _table_info_loader { | |
$sth->finish; | ||
|
||
$self->column_set( \%column_set ); | ||
$self->size_limit( \%size_limit ); | ||
$self->primary_key( \@primary_key ); | ||
$self->autoinc_id( $autoinc_id ); | ||
} | ||
|
@@ -303,9 +334,7 @@ sub fetch_all { | |
while(my $hashref = $sth->fetchrow_hashref) { | ||
|
||
foreach my $overflow_key (@overflow_columns) { | ||
if($hashref->{$overflow_key} =~ /^_ext(?:\w+)_data_id (\d+)$/) { | ||
$hashref->{$overflow_key} = $overflow_adaptor->fetch_by_analysis_data_id_TO_data($1); | ||
} | ||
$hashref->{$overflow_key} = $self->check_and_dereference_analysis_data($hashref->{$overflow_key}); | ||
} | ||
|
||
my $pptr = \$result_struct; | ||
|
@@ -530,6 +559,79 @@ sub store { | |
} | ||
|
||
|
||
=head2 check_and_dereference_analysis_data | ||
|
||
Arg [1] : string data that may reference an analysis_data_id | ||
Usage : my $value = $self->check_and_dereference_analysis_data($$fetched_row[0]) | ||
Description : Checks to see if the passed value matches the regular expression | ||
: /^_ext(?:\w+)_data_id (\d+)$/ (e.g. "external_data_id 3", pointer to an analysis_data_id). | ||
: If so, it returns the entry from the "data" column in analysis_data from the | ||
: row containing the given analysis_data_id. | ||
: If not, it returns the passed parameter unchanged. | ||
Returntype : string | ||
|
||
=cut | ||
|
||
sub check_and_dereference_analysis_data { | ||
my $self = shift @_; | ||
my $possible_analysis_data_id_ref = shift @_; | ||
|
||
if ($possible_analysis_data_id_ref =~ /^_ext(?:\w+)_data_id (\d+)$/) { | ||
return $self->db->get_AnalysisDataAdaptor->fetch_by_analysis_data_id_TO_data($1); | ||
} else { | ||
return $possible_analysis_data_id_ref; | ||
} | ||
} | ||
|
||
sub slicer { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this |
||
my ($self, $sliceable, $fields) = @_; | ||
|
||
my $is_object = blessed($sliceable) ? 1 : 0; | ||
|
||
my $autoinc_id; | ||
if ($is_object) { | ||
$autoinc_id = $self->autoinc_id(); | ||
} | ||
my $overflow_limit = $self->overflow_limit(); | ||
my $size_limit = $self->size_limit(); | ||
|
||
my @slice; | ||
# keep track of any values needing overflow, so that overflow can | ||
# be deferred until after checking all fields for size limit violations | ||
my %needs_overflow; | ||
|
||
for (my $i = 0; $i <= $#{$fields}; $i++) { | ||
my $field = $fields->[$i]; | ||
my $value = $is_object ? $sliceable->$field() : $sliceable->{$field}; | ||
my $ol = $overflow_limit->{$field}; | ||
my $sl = $size_limit->{$field}; | ||
|
||
if ($is_object && $field eq $autoinc_id) { | ||
$slice[$i] = $sliceable->dbID(); | ||
} elsif (defined($ol) and defined($value) and (length($value) > $ol)) { | ||
# if overflow limit exists for this field, we can ignore | ||
# any size limit since an excessively large field will be | ||
# handled through overflow | ||
$needs_overflow{$i} = $value; | ||
} elsif (defined($sl) and defined($value) and (length($value) > $sl)) { | ||
# if no overflow limit, then check size and generate a meaningful error | ||
# as some RDBMS implementations fail silently or misleadingly when trying | ||
# to store an oversize value | ||
throw("length of value for column \"" . $field . | ||
"\" exceeds the maximum size, which is $sl"); | ||
} else { | ||
$slice[$i] = $value; | ||
} | ||
} | ||
|
||
foreach my $fields_index (keys(%needs_overflow)) { | ||
$slice[$fields_index] = | ||
$self->db->get_AnalysisDataAdaptor()->store_if_needed($needs_overflow{$fields_index}); | ||
} | ||
|
||
return \@slice; | ||
} | ||
|
||
sub DESTROY { } # to simplify AUTOLOAD | ||
|
||
sub AUTOLOAD { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's odd to me how many times you overload the overflow_limit sub.