Skip to content

Commit

Permalink
Suggested improvements to overflow code, including moving slicer to B…
Browse files Browse the repository at this point in the history
…aseAdaptor
  • Loading branch information
ens-bwalts committed Apr 3, 2019
1 parent 1beb0a5 commit d657f9d
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 46 deletions.
1 change: 0 additions & 1 deletion modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ sub default_table_name {
sub overflow_limit {
return {
'key_signature' => 255,
'struct_name' => 255,
};
}

Expand Down
74 changes: 73 additions & 1 deletion modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ use strict;
use warnings;
no strict 'refs'; # needed to allow AUTOLOAD create new methods
use DBI 1.6; # the 1.6 functionality is important for detecting autoincrement fields and other magic.
use Scalar::Util ('blessed');

use Bio::EnsEMBL::Hive::Utils ('stringify', 'throw');

Expand Down Expand Up @@ -138,6 +139,26 @@ sub overflow_limit {
return $self->{_overflow_limit} || $self->default_overflow_limit();
}

=head2 size_limit
Arg[1] : (optional) Hashref with column names as keys and column size as values
Description : Getter/setter for column size limits for the implementing adaptor.
If no size limit is set, this method will call _table_info_loader() to
get sizes from the database schema.
Returntype : Hashref
=cut

sub size_limit {
my $self = shift @_;

if(@_) { # setter
$self->{_size_limit} = shift @_;
} elsif ( !defined( $self->{_size_limit} ) ) {
$self->_table_info_loader();
}
return $self->{_size_limit};
}

sub input_column_mapping {
my $self = shift @_;
Expand Down Expand Up @@ -229,17 +250,19 @@ sub _table_info_loader {
my $table_name = $self->table_name();

my %column_set = ();
my %size_limit = ();
my $autoinc_id = '';
my @primary_key = $dbh->primary_key(undef, undef, $table_name);

my $sth = $dbh->column_info(undef, undef, $table_name, '%');
$sth->execute();
while (my $row = $sth->fetchrow_hashref()) {
my ( $column_name, $column_type ) = @$row{'COLUMN_NAME', 'TYPE_NAME'};
my ( $column_name, $column_type, $size_limit ) = @$row{'COLUMN_NAME', 'TYPE_NAME', 'COLUMN_SIZE'};

# warn "ColumnInfo [$table_name/$column_name] = $column_type\n";

$column_set{$column_name} = $column_type;
$size_limit{$column_name} = $size_limit;

if( ($column_name eq $table_name.'_id')
or ($table_name eq 'analysis_base' and $column_name eq 'analysis_id') ) { # a special case (historical)
Expand All @@ -249,6 +272,7 @@ sub _table_info_loader {
$sth->finish;

$self->column_set( \%column_set );
$self->size_limit( \%size_limit );
$self->primary_key( \@primary_key );
$self->autoinc_id( $autoinc_id );
}
Expand Down Expand Up @@ -552,6 +576,54 @@ sub check_and_dereference_analysis_data {
}
}

sub slicer {
my ($self, $sliceable, $fields) = @_;

my $is_object = blessed($sliceable) ? 1 : 0;

my $autoinc_id;
if ($is_object) {
$autoinc_id = $self->autoinc_id();
}
my $overflow_limit = $self->overflow_limit();
my $size_limit = $self->size_limit();

my @slice;
# keep track of any values needing overflow, so that overflow can
# be deferred until after checking all fields for size limit violations
my %needs_overflow;

for (my $i = 0; $i <= $#{$fields}; $i++) {
my $field = $fields->[$i];
my $value = $is_object ? $sliceable->$field() : $sliceable->{$field};
my $ol = $overflow_limit->{$field};
my $sl = $size_limit->{$field};

if ($is_object && $field eq $autoinc_id) {
$slice[$i] = $sliceable->dbID();
} elsif (defined($ol) and defined($value) and (length($value) > $ol)) {
# if overflow limit exists for this field, we can ignore
# any size limit since an excessively large field will be
# handled through overflow
$needs_overflow{$i} = $value;
} elsif (defined($sl) and defined($value) and (length($value) > $sl)) {
# if no overflow limit, then check size and generate a meaningful error
# as some RDBMS implementations fail silently or misleadingly when trying
# to store an oversize value
throw("length of value for column \"" . $field .
"\" exceeds the maximum size, which is $sl");
} else {
$slice[$i] = $value;
}
}

foreach my $fields_index (keys(%needs_overflow)) {
$slice[$fields_index] =
$self->db->get_AnalysisDataAdaptor()->store_if_needed($needs_overflow{$fields_index});
}

return \@slice;
}

sub DESTROY { } # to simplify AUTOLOAD

Expand Down
18 changes: 0 additions & 18 deletions modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,6 @@ use Bio::EnsEMBL::Hive::NakedTable;
use base ('Bio::EnsEMBL::Hive::DBSQL::BaseAdaptor');


sub slicer { # take a slice of the hashref (if only we could inline in Perl!)
my ($self, $hashref, $fields) = @_;

my $overflow_limit = $self->overflow_limit();

return [ map { eval { my $value = $hashref->{$_};
my $ol = $overflow_limit->{$_};
if (defined($ol) and defined($value) and (length($value) > $ol)) {
$self->db->get_AnalysisDataAdaptor()->store_if_needed($value);
} else {
$value;
}
}

} @$fields ];
}


sub objectify { # pretend the hashref becomes an object (if only we could inline in Perl!)
return pop @_;
}
Expand Down
19 changes: 0 additions & 19 deletions modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -47,25 +47,6 @@ sub object_class {
die "Please define object_class() in your specific adaptor class to return the class name of your intended object";
}


sub slicer { # take a slice of the object (if only we could inline in Perl!)
my ($self, $object, $fields) = @_;

my $autoinc_id = $self->autoinc_id();
my $overflow_limit = $self->overflow_limit();

return [ map { ($_ eq $autoinc_id)
? $object->dbID()
: eval { my $value = $object->$_();
my $ol = $overflow_limit->{$_};
(defined($ol) and defined($value) and length($value)>$ol)
? $self->db->get_AnalysisDataAdaptor()->store_if_needed( $value )
: $value
}
} @$fields ];
}


sub objectify { # turn the hashref into an object (if only we could inline in Perl!)
my ($self, $hashref) = @_;

Expand Down
35 changes: 28 additions & 7 deletions t/02.api/overflow.t
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env perl
# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
# Copyright [2016-2018] EMBL-European Bioinformatics Institute
# Copyright [2016-2019] EMBL-European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,8 @@ use warnings;
use Data::Dumper;
use File::Temp qw{tempdir};

use Test::More tests => 18;
use Test::More tests => 20;
use Test::Exception;

use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Hive::ResourceClass;
Expand Down Expand Up @@ -145,12 +146,32 @@ my $new_accu = Bio::EnsEMBL::Hive::Accumulator->new(
my $long_key_signature = 'ks' x 129;
my $long_output_id = [ { 'key' => $long_key_signature,
$long_struct_name => 1, } ];
$new_accu->dataflow(
$long_output_id,
$accu_fan_job,

throws_ok { $new_accu->dataflow(
$long_output_id,
$accu_fan_job,)
} qr/length of value/,
"exception to insert of oversize struct name";

is($ada_a->count_all(), 5, "Oversize struct name not inserted");

my $short_struct_name = 'ssn';

my $sslk_accu = Bio::EnsEMBL::Hive::Accumulator->new(
adaptor => $acu_a,
struct_name => $short_struct_name,
signature_template => '{key}',
);

is($ada_a->count_all(), 7, "Overflow for long struct_name and key_signature in accu");
my $sslk_output_id = [ { 'key' => $long_key_signature,
'ssn' => 1, } ];

$sslk_accu->dataflow(
$sslk_output_id,
$accu_fan_job,);

is($ada_a->count_all(), 6, "Oversize key signature overflow");


# Test retrieval of overflow data

Expand All @@ -177,7 +198,7 @@ my $fetched_accu_hash = $fetched_accu_structures->{$accu_funnel_job->dbID};
my $fetched_struct_name = (keys(%$fetched_accu_hash))[0];
my $fetched_key_signature = (keys(%{$fetched_accu_hash->{$fetched_struct_name}}))[0];

is ($fetched_struct_name, $long_struct_name, "fetched long struct_name from accu");
is ($fetched_struct_name, $short_struct_name, "fetched short struct_name from accu");
is ($fetched_key_signature, $long_key_signature, "fetched long key_signature from accu");

done_testing();

0 comments on commit d657f9d

Please sign in to comment.