From d657f9dd70abc915e797f205493cc488420796be Mon Sep 17 00:00:00 2001 From: ens-bwalts Date: Wed, 3 Apr 2019 09:38:41 +0100 Subject: [PATCH] Suggested improvements to overflow code, including moving slicer to BaseAdaptor --- .../EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm | 1 - modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm | 74 ++++++++++++++++++- .../EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm | 18 ----- .../Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm | 19 ----- t/02.api/overflow.t | 35 +++++++-- 5 files changed, 101 insertions(+), 46 deletions(-) diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm index c342d6658..3d41b5a4b 100644 --- a/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm +++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm @@ -51,7 +51,6 @@ sub default_table_name { sub overflow_limit { return { 'key_signature' => 255, - 'struct_name' => 255, }; } diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm index c991e79fb..ff59e5bb9 100644 --- a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm +++ b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm @@ -40,6 +40,7 @@ use strict; use warnings; no strict 'refs'; # needed to allow AUTOLOAD create new methods use DBI 1.6; # the 1.6 functionality is important for detecting autoincrement fields and other magic. +use Scalar::Util ('blessed'); use Bio::EnsEMBL::Hive::Utils ('stringify', 'throw'); @@ -138,6 +139,26 @@ sub overflow_limit { return $self->{_overflow_limit} || $self->default_overflow_limit(); } +=head2 size_limit + +Arg[1] : (optional) Hashref with column names as keys and column size as values +Description : Getter/setter for column size limits for the implementing adaptor. + If no size limit is set, this method will call _table_info_loader() to + get sizes from the database schema. +Returntype : Hashref + +=cut + +sub size_limit { + my $self = shift @_; + + if(@_) { # setter + $self->{_size_limit} = shift @_; + } elsif ( !defined( $self->{_size_limit} ) ) { + $self->_table_info_loader(); + } + return $self->{_size_limit}; +} sub input_column_mapping { my $self = shift @_; @@ -229,17 +250,19 @@ sub _table_info_loader { my $table_name = $self->table_name(); my %column_set = (); + my %size_limit = (); my $autoinc_id = ''; my @primary_key = $dbh->primary_key(undef, undef, $table_name); my $sth = $dbh->column_info(undef, undef, $table_name, '%'); $sth->execute(); while (my $row = $sth->fetchrow_hashref()) { - my ( $column_name, $column_type ) = @$row{'COLUMN_NAME', 'TYPE_NAME'}; + my ( $column_name, $column_type, $size_limit ) = @$row{'COLUMN_NAME', 'TYPE_NAME', 'COLUMN_SIZE'}; # warn "ColumnInfo [$table_name/$column_name] = $column_type\n"; $column_set{$column_name} = $column_type; + $size_limit{$column_name} = $size_limit; if( ($column_name eq $table_name.'_id') or ($table_name eq 'analysis_base' and $column_name eq 'analysis_id') ) { # a special case (historical) @@ -249,6 +272,7 @@ sub _table_info_loader { $sth->finish; $self->column_set( \%column_set ); + $self->size_limit( \%size_limit ); $self->primary_key( \@primary_key ); $self->autoinc_id( $autoinc_id ); } @@ -552,6 +576,54 @@ sub check_and_dereference_analysis_data { } } +sub slicer { + my ($self, $sliceable, $fields) = @_; + + my $is_object = blessed($sliceable) ? 1 : 0; + + my $autoinc_id; + if ($is_object) { + $autoinc_id = $self->autoinc_id(); + } + my $overflow_limit = $self->overflow_limit(); + my $size_limit = $self->size_limit(); + + my @slice; + # keep track of any values needing overflow, so that overflow can + # be deferred until after checking all fields for size limit violations + my %needs_overflow; + + for (my $i = 0; $i <= $#{$fields}; $i++) { + my $field = $fields->[$i]; + my $value = $is_object ? $sliceable->$field() : $sliceable->{$field}; + my $ol = $overflow_limit->{$field}; + my $sl = $size_limit->{$field}; + + if ($is_object && $field eq $autoinc_id) { + $slice[$i] = $sliceable->dbID(); + } elsif (defined($ol) and defined($value) and (length($value) > $ol)) { + # if overflow limit exists for this field, we can ignore + # any size limit since an excessively large field will be + # handled through overflow + $needs_overflow{$i} = $value; + } elsif (defined($sl) and defined($value) and (length($value) > $sl)) { + # if no overflow limit, then check size and generate a meaningful error + # as some RDBMS implementations fail silently or misleadingly when trying + # to store an oversize value + throw("length of value for column \"" . $field . + "\" exceeds the maximum size, which is $sl"); + } else { + $slice[$i] = $value; + } + } + + foreach my $fields_index (keys(%needs_overflow)) { + $slice[$fields_index] = + $self->db->get_AnalysisDataAdaptor()->store_if_needed($needs_overflow{$fields_index}); + } + + return \@slice; +} sub DESTROY { } # to simplify AUTOLOAD diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm index 48e602d74..307512803 100644 --- a/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm +++ b/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm @@ -44,24 +44,6 @@ use Bio::EnsEMBL::Hive::NakedTable; use base ('Bio::EnsEMBL::Hive::DBSQL::BaseAdaptor'); -sub slicer { # take a slice of the hashref (if only we could inline in Perl!) - my ($self, $hashref, $fields) = @_; - - my $overflow_limit = $self->overflow_limit(); - - return [ map { eval { my $value = $hashref->{$_}; - my $ol = $overflow_limit->{$_}; - if (defined($ol) and defined($value) and (length($value) > $ol)) { - $self->db->get_AnalysisDataAdaptor()->store_if_needed($value); - } else { - $value; - } - } - - } @$fields ]; -} - - sub objectify { # pretend the hashref becomes an object (if only we could inline in Perl!) return pop @_; } diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm index b3126e86f..f833f47bf 100644 --- a/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm +++ b/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm @@ -47,25 +47,6 @@ sub object_class { die "Please define object_class() in your specific adaptor class to return the class name of your intended object"; } - -sub slicer { # take a slice of the object (if only we could inline in Perl!) - my ($self, $object, $fields) = @_; - - my $autoinc_id = $self->autoinc_id(); - my $overflow_limit = $self->overflow_limit(); - - return [ map { ($_ eq $autoinc_id) - ? $object->dbID() - : eval { my $value = $object->$_(); - my $ol = $overflow_limit->{$_}; - (defined($ol) and defined($value) and length($value)>$ol) - ? $self->db->get_AnalysisDataAdaptor()->store_if_needed( $value ) - : $value - } - } @$fields ]; -} - - sub objectify { # turn the hashref into an object (if only we could inline in Perl!) my ($self, $hashref) = @_; diff --git a/t/02.api/overflow.t b/t/02.api/overflow.t index 6b041b238..259e96541 100755 --- a/t/02.api/overflow.t +++ b/t/02.api/overflow.t @@ -1,6 +1,6 @@ #!/usr/bin/env perl # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute -# Copyright [2016-2018] EMBL-European Bioinformatics Institute +# Copyright [2016-2019] EMBL-European Bioinformatics Institute # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,7 +21,8 @@ use warnings; use Data::Dumper; use File::Temp qw{tempdir}; -use Test::More tests => 18; +use Test::More tests => 20; +use Test::Exception; use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor; use Bio::EnsEMBL::Hive::ResourceClass; @@ -145,12 +146,32 @@ my $new_accu = Bio::EnsEMBL::Hive::Accumulator->new( my $long_key_signature = 'ks' x 129; my $long_output_id = [ { 'key' => $long_key_signature, $long_struct_name => 1, } ]; -$new_accu->dataflow( - $long_output_id, - $accu_fan_job, + +throws_ok { $new_accu->dataflow( + $long_output_id, + $accu_fan_job,) + } qr/length of value/, + "exception to insert of oversize struct name"; + +is($ada_a->count_all(), 5, "Oversize struct name not inserted"); + +my $short_struct_name = 'ssn'; + +my $sslk_accu = Bio::EnsEMBL::Hive::Accumulator->new( + adaptor => $acu_a, + struct_name => $short_struct_name, + signature_template => '{key}', ); -is($ada_a->count_all(), 7, "Overflow for long struct_name and key_signature in accu"); +my $sslk_output_id = [ { 'key' => $long_key_signature, + 'ssn' => 1, } ]; + +$sslk_accu->dataflow( + $sslk_output_id, + $accu_fan_job,); + +is($ada_a->count_all(), 6, "Oversize key signature overflow"); + # Test retrieval of overflow data @@ -177,7 +198,7 @@ my $fetched_accu_hash = $fetched_accu_structures->{$accu_funnel_job->dbID}; my $fetched_struct_name = (keys(%$fetched_accu_hash))[0]; my $fetched_key_signature = (keys(%{$fetched_accu_hash->{$fetched_struct_name}}))[0]; -is ($fetched_struct_name, $long_struct_name, "fetched long struct_name from accu"); +is ($fetched_struct_name, $short_struct_name, "fetched short struct_name from accu"); is ($fetched_key_signature, $long_key_signature, "fetched long key_signature from accu"); done_testing();