From 1e85f0bcfa55403e4ef31a016e200e2aeed990b4 Mon Sep 17 00:00:00 2001
From: ens-bwalts <bwalts@ebi.ac.uk>
Date: Tue, 24 Jul 2018 17:30:29 +0100
Subject: [PATCH 1/5] enable overflow for accu keys, to_analysis_urls, and
 submission_cmd_args

---
 .../EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm  | 29 +++++++++++++++++++
 modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm | 25 ++++++++++++++++
 .../EnsEMBL/Hive/DBSQL/DataflowRuleAdaptor.pm |  7 +++++
 .../Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm   |  2 +-
 .../Hive/DBSQL/ResourceDescriptionAdaptor.pm  |  7 +++++
 5 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
index cbc67d808..461bb6b9e 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
@@ -48,6 +48,32 @@ sub default_table_name {
 }
 
 
+sub slicer {    # take a slice of the hashref (if only we could inline in Perl!)
+    my ($self, $hashref, $fields) = @_;
+
+    my $overflow_limit = $self->overflow_limit();
+
+    return [ map { eval { my $value = $hashref->{$_};
+                          my $ol = $overflow_limit->{$_};
+                          if (defined($ol) and defined($value) and  (length($value) > $ol)) {
+                              $self->db->get_AnalysisDataAdaptor()->store_if_needed($value);
+                          } else {
+                              $value;
+                          }
+                   }
+
+
+             } @$fields ];
+}
+
+sub overflow_limit {
+    return {
+        'key_signature' => 255,
+        'struct_name'   => 255,
+    };
+}
+
+
 sub fetch_structures_for_job_ids {
     my ($self, $job_ids_csv, $id_scale, $id_offset) = @_;
     $id_scale   ||= 1;
@@ -63,6 +89,9 @@ sub fetch_structures_for_job_ids {
 
         ROW: while(my ($receiving_job_id, $struct_name, $key_signature, $stringified_value) = $sth->fetchrow_array() ) {
 
+            ($key_signature, $struct_name) = map {$self->check_and_dereference_analysis_data($_)}
+                ($key_signature, $struct_name);
+
             my $value = destringify($stringified_value);
 
             my $sptr = \$structures{$receiving_job_id * $id_scale + $id_offset}{$struct_name};
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
index 78eebb10d..81191cd5b 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
@@ -530,6 +530,31 @@ sub store {
 }
 
 
+=head2 check_and_dereference_analysis_data
+
+   Arg [1]     : string data that may reference an analysis_data_id
+   Usage       : my $value = $self->check_and_dereference_analysis_data($$fetched_row[0])
+   Description : Checks to see if the passed value matches the regular expression
+               : /^_ext(?:\w+)_data_id (\d+)$/ (e.g. "external_data_id 3", pointer to an analysis_data_id).
+               : If so, it returns the entry from the "data" column in analysis_data from the
+               : row containing the given analysis_data_id.
+               : If not, it returns the passed parameter unchanged.
+   Returntype  : string
+
+=cut
+
+sub check_and_dereference_analysis_data {
+    my $self = shift @_;
+    my $possible_analysis_data_id_ref = shift @_;
+
+    if ($possible_analysis_data_id_ref  =~ /^_ext(?:\w+)_data_id (\d+)$/) {
+        return $self->db->get_AnalysisDataAdaptor->fetch_by_analysis_data_id_TO_data($1);
+    } else {
+        return $possible_analysis_data_id_ref;
+    }
+}
+
+
 sub DESTROY { }   # to simplify AUTOLOAD
 
 sub AUTOLOAD {
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/DataflowRuleAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/DataflowRuleAdaptor.pm
index 28b77077e..9d9526441 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/DataflowRuleAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/DataflowRuleAdaptor.pm
@@ -55,6 +55,13 @@ sub default_insertion_method {
 }
 
 
+sub overflow_limit {
+    return {
+        'to_analysis_url' => 255,
+    };
+}
+
+
 sub object_class {
     return 'Bio::EnsEMBL::Hive::DataflowRule';
 }
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm
index cabdfd7f4..b3126e86f 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm
@@ -58,7 +58,7 @@ sub slicer {    # take a slice of the object (if only we could inline in Perl!)
                     ? $object->dbID()
                     : eval { my $value  = $object->$_();
                              my $ol     = $overflow_limit->{$_};
-                             (defined($ol) and length($value)>$ol)
+                             (defined($ol) and defined($value) and length($value)>$ol)
                                 ? $self->db->get_AnalysisDataAdaptor()->store_if_needed( $value )
                                 : $value
                       }
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/ResourceDescriptionAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/ResourceDescriptionAdaptor.pm
index f0bc1fa96..888ad1853 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/ResourceDescriptionAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/ResourceDescriptionAdaptor.pm
@@ -55,6 +55,13 @@ sub default_insertion_method {
 }
 
 
+sub overflow_limit {
+    return {
+        'submission_cmd_args' => 255,
+    };
+}
+
+
 sub object_class {
     return 'Bio::EnsEMBL::Hive::ResourceDescription';
 }

From 48b3e5836502c0f407c7eb99d3f3048878b16229 Mon Sep 17 00:00:00 2001
From: ens-bwalts <bwalts@ebi.ac.uk>
Date: Fri, 24 Aug 2018 17:54:04 +0100
Subject: [PATCH 2/5] improvements to overflow handling

---
 .../EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm  |  18 --
 .../Hive/DBSQL/AnalysisCtrlRuleAdaptor.pm     |   5 +
 .../EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm  |   4 +-
 modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm |   4 +-
 .../EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm   |  13 +-
 .../Hive/DBSQL/ResourceDescriptionAdaptor.pm  |   1 +
 .../fetch_and_count_by_multiple_columns.t     |  28 +--
 t/02.api/overflow.t                           | 183 ++++++++++++++++++
 8 files changed, 204 insertions(+), 52 deletions(-)
 create mode 100755 t/02.api/overflow.t

diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
index 461bb6b9e..c342d6658 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
@@ -48,24 +48,6 @@ sub default_table_name {
 }
 
 
-sub slicer {    # take a slice of the hashref (if only we could inline in Perl!)
-    my ($self, $hashref, $fields) = @_;
-
-    my $overflow_limit = $self->overflow_limit();
-
-    return [ map { eval { my $value = $hashref->{$_};
-                          my $ol = $overflow_limit->{$_};
-                          if (defined($ol) and defined($value) and  (length($value) > $ol)) {
-                              $self->db->get_AnalysisDataAdaptor()->store_if_needed($value);
-                          } else {
-                              $value;
-                          }
-                   }
-
-
-             } @$fields ];
-}
-
 sub overflow_limit {
     return {
         'key_signature' => 255,
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisCtrlRuleAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisCtrlRuleAdaptor.pm
index 0e1505c97..65bcb637c 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisCtrlRuleAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisCtrlRuleAdaptor.pm
@@ -53,6 +53,11 @@ sub default_insertion_method {
     return 'INSERT_IGNORE';
 }
 
+sub overflow_limit {
+    return {
+        'condition_analysis_url' => 255,
+    };
+}
 
 sub object_class {
     return 'Bio::EnsEMBL::Hive::AnalysisCtrlRule';
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm
index 18b07a5f2..3b9368f0d 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm
@@ -700,9 +700,7 @@ sub fetch_input_ids_for_job_ids {
         $sth->execute();
 
         while(my ($job_id, $input_id) = $sth->fetchrow_array() ) {
-            if($input_id =~ /^_ext(?:\w+)_data_id (\d+)$/) {
-                $input_id = $self->db->get_AnalysisDataAdaptor->fetch_by_analysis_data_id_TO_data($1);
-            }
+            $input_id = $self->check_and_dereference_analysis_data($input_id);
             $input_ids{$job_id * $id_scale + $id_offset} = $input_id;
         }
     }
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
index 81191cd5b..c991e79fb 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
@@ -303,9 +303,7 @@ sub fetch_all {
     while(my $hashref = $sth->fetchrow_hashref) {
 
         foreach my $overflow_key (@overflow_columns) {
-            if($hashref->{$overflow_key} =~ /^_ext(?:\w+)_data_id (\d+)$/) {
-                $hashref->{$overflow_key} = $overflow_adaptor->fetch_by_analysis_data_id_TO_data($1);
-            }
+            $hashref->{$overflow_key} = $self->check_and_dereference_analysis_data($hashref->{$overflow_key});
         }
 
         my $pptr = \$result_struct;
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm
index 34fe209ba..48e602d74 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm
@@ -47,7 +47,18 @@ use base ('Bio::EnsEMBL::Hive::DBSQL::BaseAdaptor');
 sub slicer {    # take a slice of the hashref (if only we could inline in Perl!)
     my ($self, $hashref, $fields) = @_;
 
-    return [ @$hashref{@$fields} ];
+    my $overflow_limit = $self->overflow_limit();
+
+    return [ map { eval { my $value = $hashref->{$_};
+                          my $ol = $overflow_limit->{$_};
+                          if (defined($ol) and defined($value) and  (length($value) > $ol)) {
+                              $self->db->get_AnalysisDataAdaptor()->store_if_needed($value);
+                          } else {
+                              $value;
+                          }
+                        }
+
+                 } @$fields ];
 }
 
 
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/ResourceDescriptionAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/ResourceDescriptionAdaptor.pm
index 888ad1853..57418a09d 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/ResourceDescriptionAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/ResourceDescriptionAdaptor.pm
@@ -58,6 +58,7 @@ sub default_insertion_method {
 sub overflow_limit {
     return {
         'submission_cmd_args' => 255,
+        'worker_cmd_args'     => 255,
     };
 }
 
diff --git a/t/02.api/fetch_and_count_by_multiple_columns.t b/t/02.api/fetch_and_count_by_multiple_columns.t
index 327dbdb3a..12ebceb42 100755
--- a/t/02.api/fetch_and_count_by_multiple_columns.t
+++ b/t/02.api/fetch_and_count_by_multiple_columns.t
@@ -21,7 +21,7 @@ use warnings;
 use Data::Dumper;
 use File::Temp qw{tempdir};
 
-use Test::More tests => 16;
+use Test::More tests => 11;
 
 use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
 
@@ -40,9 +40,7 @@ my $pipeline_url      = 'sqlite:///ehive_test_pipeline_db';
 
 my $hive_dba    = init_pipeline('Bio::EnsEMBL::Hive::PipeConfig::LongMult_conf', $pipeline_url, [-hive_force_init => 1]);
 my $ana_a       = $hive_dba->get_AnalysisAdaptor;
-my $job_a       = $hive_dba->get_AnalysisJobAdaptor;
 my $dfr_a       = $hive_dba->get_DataflowRuleAdaptor;
-my $ada_a       = $hive_dba->get_AnalysisDataAdaptor;
 
 is($ana_a->count_all(), 3, 'There are 3 analyses in the pipeline');
 is($ana_a->count_all_by_logic_name('take_b_apart'), 1, 'But only 1 "take_b_apart"');
@@ -62,30 +60,6 @@ is($matching_analyses->[0]->to_analysis_url, 'part_multiply', 'Correct target lo
 
 is($dfr_a->count_all_by_branch_code(1), 3, 'There are 2 #1 branches in the pipeline');
 
-my $long_input_id = sprintf('{ "long_param" => "%s" }', 'tmp' x 1000);
-my $new_job = Bio::EnsEMBL::Hive::AnalysisJob->new(
-    'input_id'      => $long_input_id,
-    'analysis_id'   => 1,
-);
-
-# Test the overflow to the analysis_data table
-is($ada_a->count_all(), 0, "Nothing in the analysis_data table (yet)");
-
-$job_a->store($new_job);
-is($ada_a->count_all(), 1, "1 entry in the analysis_data table");
-
-is($ada_a->fetch_by_data_TO_analysis_data_id('unmatched input_id'), undef, 'fetch_by_data_to_analysis_data_id() returns undef when it cannot find the input_id');
-my $ext_data_id = $ada_a->fetch_by_data_TO_analysis_data_id($long_input_id);
-is($ext_data_id, 1, 'analysis_data_id starts at 1');
-
-my $another_job = Bio::EnsEMBL::Hive::AnalysisJob->new(
-    'input_id'      => $long_input_id,
-    'analysis_id'   => 2,
-);
-
-$job_a->store($another_job);
-is($ada_a->count_all(), 1, "still 1 entry in the analysis_data table");
-
 done_testing();
 
 chdir $original;
diff --git a/t/02.api/overflow.t b/t/02.api/overflow.t
new file mode 100755
index 000000000..7382abd09
--- /dev/null
+++ b/t/02.api/overflow.t
@@ -0,0 +1,183 @@
+#!/usr/bin/env perl
+# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+# Copyright [2016-2018] EMBL-European Bioinformatics Institute
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+use strict;
+use warnings;
+
+use Data::Dumper;
+use File::Temp qw{tempdir};
+
+use Test::More tests => 18;
+
+use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
+use Bio::EnsEMBL::Hive::ResourceClass;
+
+use Bio::EnsEMBL::Hive::Utils::Test qw(init_pipeline);
+
+# eHive needs this to initialize the pipeline (and run db_cmd.pl)
+use Cwd            ();
+use File::Basename ();
+$ENV{'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) ) );
+
+my $dir = tempdir CLEANUP => 1;
+chdir $dir;
+
+my $pipeline_url      = 'sqlite:///ehive_test_pipeline_db';
+
+my $hive_dba    = init_pipeline('Bio::EnsEMBL::Hive::PipeConfig::LongMult_conf', $pipeline_url, [-hive_force_init => 1]);
+
+my $job_a       = $hive_dba->get_AnalysisJobAdaptor;
+my $rcl_a       = $hive_dba->get_ResourceClassAdaptor;
+my $rde_a       = $hive_dba->get_ResourceDescriptionAdaptor;
+my $dfr_a       = $hive_dba->get_DataflowRuleAdaptor;
+my $ada_a       = $hive_dba->get_AnalysisDataAdaptor;
+my $acu_a       = $hive_dba->get_AccumulatorAdaptor;
+my $acr_a       = $hive_dba->get_AnalysisCtrlRuleAdaptor;
+my $ana_a       = $hive_dba->get_AnalysisAdaptor;
+
+my $long_input_id = sprintf('{ "long_param" => "%s" }', 'tmp' x 1000);
+my $new_job = Bio::EnsEMBL::Hive::AnalysisJob->new(
+    'input_id'      => $long_input_id,
+    'analysis_id'   => 1,
+);
+
+# Test the overflow into the analysis_data table
+# Test overflow for input_id
+is($ada_a->count_all(), 0, "Nothing in the analysis_data table (yet)");
+
+$job_a->store($new_job);
+is($ada_a->count_all(), 1, "1 entry in the analysis_data table");
+
+is($ada_a->fetch_by_data_TO_analysis_data_id('unmatched input_id'), undef, 'fetch_by_data_to_analysis_data_id() returns undef when it cannot find the input_id');
+my $ext_data_id = $ada_a->fetch_by_data_TO_analysis_data_id($long_input_id);
+is($ext_data_id, 1, 'analysis_data_id starts at 1');
+
+my $fan_job = Bio::EnsEMBL::Hive::AnalysisJob->new(
+    'input_id'      => $long_input_id,
+    'analysis_id'   => 2,
+);
+
+$job_a->store($fan_job);
+is($ada_a->count_all(), 1, "still 1 entry in the analysis_data table");
+
+# Test overflow for resource description args
+
+Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->init_collections();
+my $new_rc = Bio::EnsEMBL::Hive::ResourceClass->add_new_or_update(
+    'name' => 'testresourceclass',
+);
+
+my $long_sca = 'sc' x 129;
+my $long_wca = 'wc' x 129;
+my $new_rd = Bio::EnsEMBL::Hive::ResourceDescription->add_new_or_update(
+    'resource_class'      => $new_rc,
+    'meadow_type'         => 'test_meadow',
+    'submission_cmd_args' => $long_sca,
+    'worker_cmd_args'     => $long_wca,
+);
+
+$rcl_a->store($new_rc);
+$rde_a->store($new_rd);
+is($ada_a->count_all(), 3, "New resource description overflowed two entries to analysis_data, total 3");
+
+# Test overflow for to_analysis_urls
+
+my $long_struct_name = 'ta' x 129;
+my $long_to_analysis_url = ':////accu?' . $long_struct_name;
+my $new_dfr = Bio::EnsEMBL::Hive::DataflowRule->add_new_or_update(
+    'from_analysis'     => $ana_a->fetch_by_dbID(1),
+    'to_analysis_url'   => $long_to_analysis_url,
+    'branch_code'       => 3,
+);
+
+$dfr_a->store($new_dfr);
+is($ada_a->count_all(), 4, "New to_analysis_url overflowed an entry to analysis_data, total 4");
+
+# Test overflow for condition analysis urls
+
+my $long_cau = 'cau' x 86;
+my $ctrled_analysis_id = 1;
+my $new_acr = Bio::EnsEMBL::Hive::AnalysisCtrlRule->add_new_or_update(
+    'condition_analysis_url' => $long_cau,
+    'ctrled_analysis'     => $ana_a->fetch_by_dbID($ctrled_analysis_id),
+);
+
+$acr_a->store($new_acr);
+is($ada_a->count_all(), 5, "New condition_analysis_url overflowed an entry to analysis_data, total 5");
+
+# Test overflow for accu key_signatures
+# Note: AccumulatorAdaptor will complain if storing an accu without a proper fan job
+# and semaphored funnel job
+
+my $accu_funnel_job =  Bio::EnsEMBL::Hive::AnalysisJob->new(
+    'input_id'      => {},
+    'analysis_id'   => 3,
+);
+$job_a->store($accu_funnel_job);
+
+my $accu_fan_job =  Bio::EnsEMBL::Hive::AnalysisJob->new(
+    'input_id'      => {},
+    'analysis_id'   => 2,
+    'semaphored_job_id' => $accu_funnel_job->dbID,
+);
+$job_a->store($accu_fan_job);
+
+my $new_accu = Bio::EnsEMBL::Hive::Accumulator->new(
+    adaptor            => $acu_a,
+    struct_name        => $long_struct_name,
+    signature_template => '{key}',
+);
+
+my $long_key_signature = 'ks' x 129;
+my $long_output_id = [ { 'key' => $long_key_signature,
+                         $long_struct_name => 1, } ];
+$new_accu->dataflow(
+    $long_output_id,
+    $accu_fan_job,
+);
+
+is($ada_a->count_all(), 7, "Overflow for long struct_name and key_signature in accu");
+
+# Test retrieval of overflow data
+
+my $fetched_rds = $rde_a->fetch_all();
+my $rd_with_long_args;
+foreach my $fetched_rd (@$fetched_rds) {
+    if ($fetched_rd->resource_class_id() == $new_rc->dbID) {
+        $rd_with_long_args = $fetched_rd;
+    }
+}
+
+is($rd_with_long_args->submission_cmd_args, $long_sca, "Retrieved long submission_cmd_args");
+is($rd_with_long_args->worker_cmd_args, $long_wca, "Retrieved long worker_cmd_args");
+
+my $fetched_dfr = $dfr_a->fetch_by_dbID($new_dfr->dbID);
+is ($fetched_dfr->to_analysis_url, $long_to_analysis_url, "Retrieved long to_analysis_url");
+
+my $fetched_acr = $acr_a->fetch_by_ctrled_analysis_id($ctrled_analysis_id);
+is ($fetched_acr->condition_analysis_url, $long_cau, "Retrieved long condition_analysis_url");
+
+# $fetched_accu_structures->{$receiving_job_id}->{$struct_name}->{$key_signature} = value
+my $fetched_accu_structures = $acu_a->fetch_structures_for_job_ids($accu_funnel_job->dbID);
+my $fetched_accu_hash = $fetched_accu_structures->{$accu_funnel_job->dbID};
+my $fetched_struct_name = (keys(%$fetched_accu_hash))[0];
+my $fetched_key_signature = (keys(%$fetched_accu_hash->{$fetched_struct_name}))[0];
+
+is ($fetched_struct_name, $long_struct_name, "fetched long struct_name from accu");
+is ($fetched_key_signature, $long_key_signature, "fetched long key_signature from accu");
+
+done_testing();

From 1beb0a53bad9ca46dc4640da5c7f86f66de3e1fa Mon Sep 17 00:00:00 2001
From: ens-bwalts <bwalts@ebi.ac.uk>
Date: Tue, 28 Aug 2018 13:34:25 +0100
Subject: [PATCH 3/5] syntax for hashref element access changes between Perl
 versions

---
 t/02.api/overflow.t | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/02.api/overflow.t b/t/02.api/overflow.t
index 7382abd09..6b041b238 100755
--- a/t/02.api/overflow.t
+++ b/t/02.api/overflow.t
@@ -175,7 +175,7 @@ is ($fetched_acr->condition_analysis_url, $long_cau, "Retrieved long condition_a
 my $fetched_accu_structures = $acu_a->fetch_structures_for_job_ids($accu_funnel_job->dbID);
 my $fetched_accu_hash = $fetched_accu_structures->{$accu_funnel_job->dbID};
 my $fetched_struct_name = (keys(%$fetched_accu_hash))[0];
-my $fetched_key_signature = (keys(%$fetched_accu_hash->{$fetched_struct_name}))[0];
+my $fetched_key_signature = (keys(%{$fetched_accu_hash->{$fetched_struct_name}}))[0];
 
 is ($fetched_struct_name, $long_struct_name, "fetched long struct_name from accu");
 is ($fetched_key_signature, $long_key_signature, "fetched long key_signature from accu");

From d657f9dd70abc915e797f205493cc488420796be Mon Sep 17 00:00:00 2001
From: ens-bwalts <bwalts@ebi.ac.uk>
Date: Wed, 3 Apr 2019 09:38:41 +0100
Subject: [PATCH 4/5] Suggested improvements to overflow code, including moving
 slicer to BaseAdaptor

---
 .../EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm  |  1 -
 modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm | 74 ++++++++++++++++++-
 .../EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm   | 18 -----
 .../Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm   | 19 -----
 t/02.api/overflow.t                           | 35 +++++++--
 5 files changed, 101 insertions(+), 46 deletions(-)

diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
index c342d6658..3d41b5a4b 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AccumulatorAdaptor.pm
@@ -51,7 +51,6 @@ sub default_table_name {
 sub overflow_limit {
     return {
         'key_signature' => 255,
-        'struct_name'   => 255,
     };
 }
 
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
index c991e79fb..ff59e5bb9 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
@@ -40,6 +40,7 @@ use strict;
 use warnings;
 no strict 'refs';   # needed to allow AUTOLOAD create new methods
 use DBI 1.6;        # the 1.6 functionality is important for detecting autoincrement fields and other magic.
+use Scalar::Util ('blessed');
 
 use Bio::EnsEMBL::Hive::Utils ('stringify', 'throw');
 
@@ -138,6 +139,26 @@ sub overflow_limit {
     return $self->{_overflow_limit} || $self->default_overflow_limit();
 }
 
+=head2 size_limit
+
+Arg[1]      : (optional) Hashref with column names as keys and column size as values
+Description : Getter/setter for column size limits for the implementing adaptor.
+              If no size limit is set, this method will call _table_info_loader() to
+              get sizes from the database schema.
+Returntype  : Hashref
+
+=cut
+
+sub size_limit {
+    my $self = shift @_;
+
+    if(@_) {   # setter
+        $self->{_size_limit} = shift @_;
+    } elsif ( !defined( $self->{_size_limit} ) ) {
+        $self->_table_info_loader();
+    }
+    return $self->{_size_limit};
+}
 
 sub input_column_mapping {
     my $self = shift @_;
@@ -229,17 +250,19 @@ sub _table_info_loader {
     my $table_name  = $self->table_name();
 
     my %column_set  = ();
+    my %size_limit  = ();
     my $autoinc_id  = '';
     my @primary_key = $dbh->primary_key(undef, undef, $table_name);
 
     my $sth = $dbh->column_info(undef, undef, $table_name, '%');
     $sth->execute();
     while (my $row = $sth->fetchrow_hashref()) {
-        my ( $column_name, $column_type ) = @$row{'COLUMN_NAME', 'TYPE_NAME'};
+        my ( $column_name, $column_type, $size_limit ) = @$row{'COLUMN_NAME', 'TYPE_NAME', 'COLUMN_SIZE'};
 
         # warn "ColumnInfo [$table_name/$column_name] = $column_type\n";
 
         $column_set{$column_name}  = $column_type;
+        $size_limit{$column_name}  = $size_limit;
 
         if( ($column_name eq $table_name.'_id')
          or ($table_name eq 'analysis_base' and $column_name eq 'analysis_id') ) {    # a special case (historical)
@@ -249,6 +272,7 @@ sub _table_info_loader {
     $sth->finish;
 
     $self->column_set(  \%column_set );
+    $self->size_limit(  \%size_limit );
     $self->primary_key( \@primary_key );
     $self->autoinc_id(   $autoinc_id );
 }
@@ -552,6 +576,54 @@ sub check_and_dereference_analysis_data {
     }
 }
 
+sub slicer {
+    my ($self, $sliceable, $fields) = @_;
+
+    my $is_object = blessed($sliceable) ? 1 : 0;
+
+    my $autoinc_id;
+    if ($is_object) {
+        $autoinc_id      = $self->autoinc_id();
+    }
+    my $overflow_limit  = $self->overflow_limit();
+    my $size_limit      = $self->size_limit();
+
+    my @slice;
+    # keep track of any values needing overflow, so that overflow can
+    # be deferred until after checking all fields for size limit violations
+    my %needs_overflow;
+
+    for (my $i = 0; $i <= $#{$fields}; $i++) {
+        my $field = $fields->[$i];
+        my $value = $is_object ? $sliceable->$field() : $sliceable->{$field};
+        my $ol = $overflow_limit->{$field};
+        my $sl = $size_limit->{$field};
+
+        if ($is_object && $field eq $autoinc_id) {
+            $slice[$i] = $sliceable->dbID();
+        } elsif (defined($ol) and defined($value) and (length($value) > $ol)) {
+            # if overflow limit exists for this field, we can ignore
+            # any size limit since an excessively large field will be
+            # handled through overflow
+            $needs_overflow{$i} = $value;
+        } elsif (defined($sl) and defined($value) and (length($value) > $sl)) {
+            # if no overflow limit, then check size and generate a meaningful error
+            # as some RDBMS implementations fail silently or misleadingly when trying
+            # to store an oversize value
+            throw("length of value for column \"" . $field .
+                  "\" exceeds the maximum size, which is $sl");
+        } else {
+            $slice[$i] = $value;
+        }
+    }
+
+    foreach my $fields_index (keys(%needs_overflow)) {
+        $slice[$fields_index] =
+            $self->db->get_AnalysisDataAdaptor()->store_if_needed($needs_overflow{$fields_index});
+    }
+
+    return \@slice;
+}
 
 sub DESTROY { }   # to simplify AUTOLOAD
 
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm
index 48e602d74..307512803 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/NakedTableAdaptor.pm
@@ -44,24 +44,6 @@ use Bio::EnsEMBL::Hive::NakedTable;
 use base ('Bio::EnsEMBL::Hive::DBSQL::BaseAdaptor');
 
 
-sub slicer {    # take a slice of the hashref (if only we could inline in Perl!)
-    my ($self, $hashref, $fields) = @_;
-
-    my $overflow_limit = $self->overflow_limit();
-
-    return [ map { eval { my $value = $hashref->{$_};
-                          my $ol = $overflow_limit->{$_};
-                          if (defined($ol) and defined($value) and  (length($value) > $ol)) {
-                              $self->db->get_AnalysisDataAdaptor()->store_if_needed($value);
-                          } else {
-                              $value;
-                          }
-                        }
-
-                 } @$fields ];
-}
-
-
 sub objectify {    # pretend the hashref becomes an object (if only we could inline in Perl!)
     return pop @_;
 }
diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm
index b3126e86f..f833f47bf 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/ObjectAdaptor.pm
@@ -47,25 +47,6 @@ sub object_class {
     die "Please define object_class() in your specific adaptor class to return the class name of your intended object";
 }
 
-
-sub slicer {    # take a slice of the object (if only we could inline in Perl!)
-    my ($self, $object, $fields) = @_;
-
-    my $autoinc_id      = $self->autoinc_id();
-    my $overflow_limit  = $self->overflow_limit();
-
-    return [ map { ($_ eq $autoinc_id)
-                    ? $object->dbID()
-                    : eval { my $value  = $object->$_();
-                             my $ol     = $overflow_limit->{$_};
-                             (defined($ol) and defined($value) and length($value)>$ol)
-                                ? $self->db->get_AnalysisDataAdaptor()->store_if_needed( $value )
-                                : $value
-                      }
-                 } @$fields ];
-}
-
-
 sub objectify { # turn the hashref into an object (if only we could inline in Perl!)
     my ($self, $hashref) = @_;
 
diff --git a/t/02.api/overflow.t b/t/02.api/overflow.t
index 6b041b238..259e96541 100755
--- a/t/02.api/overflow.t
+++ b/t/02.api/overflow.t
@@ -1,6 +1,6 @@
 #!/usr/bin/env perl
 # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
-# Copyright [2016-2018] EMBL-European Bioinformatics Institute
+# Copyright [2016-2019] EMBL-European Bioinformatics Institute
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,7 +21,8 @@ use warnings;
 use Data::Dumper;
 use File::Temp qw{tempdir};
 
-use Test::More tests => 18;
+use Test::More tests => 20;
+use Test::Exception;
 
 use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
 use Bio::EnsEMBL::Hive::ResourceClass;
@@ -145,12 +146,32 @@ my $new_accu = Bio::EnsEMBL::Hive::Accumulator->new(
 my $long_key_signature = 'ks' x 129;
 my $long_output_id = [ { 'key' => $long_key_signature,
                          $long_struct_name => 1, } ];
-$new_accu->dataflow(
-    $long_output_id,
-    $accu_fan_job,
+
+throws_ok { $new_accu->dataflow(
+                  $long_output_id,
+                  $accu_fan_job,)
+            } qr/length of value/,
+            "exception to insert of oversize struct name";
+
+is($ada_a->count_all(), 5, "Oversize struct name not inserted");
+
+my $short_struct_name = 'ssn';
+
+my $sslk_accu = Bio::EnsEMBL::Hive::Accumulator->new(
+    adaptor            => $acu_a,
+    struct_name        => $short_struct_name,
+    signature_template => '{key}',
 );
 
-is($ada_a->count_all(), 7, "Overflow for long struct_name and key_signature in accu");
+my $sslk_output_id = [ { 'key' => $long_key_signature,
+                         'ssn' => 1, } ];
+
+$sslk_accu->dataflow(
+    $sslk_output_id,
+    $accu_fan_job,);
+
+is($ada_a->count_all(), 6, "Oversize key signature overflow");
+
 
 # Test retrieval of overflow data
 
@@ -177,7 +198,7 @@ my $fetched_accu_hash = $fetched_accu_structures->{$accu_funnel_job->dbID};
 my $fetched_struct_name = (keys(%$fetched_accu_hash))[0];
 my $fetched_key_signature = (keys(%{$fetched_accu_hash->{$fetched_struct_name}}))[0];
 
-is ($fetched_struct_name, $long_struct_name, "fetched long struct_name from accu");
+is ($fetched_struct_name, $short_struct_name, "fetched short struct_name from accu");
 is ($fetched_key_signature, $long_key_signature, "fetched long key_signature from accu");
 
 done_testing();

From 8beee26bde585fe008dce36140b44b32c6dd20e6 Mon Sep 17 00:00:00 2001
From: ens-bwalts <bwalts@ebi.ac.uk>
Date: Wed, 3 Apr 2019 13:44:20 +0100
Subject: [PATCH 5/5] When using PostgreSQL, only set a size_limit for VARCHAR
 columns

---
 modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
index ff59e5bb9..fc853a1a4 100644
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/BaseAdaptor.pm
@@ -262,7 +262,14 @@ sub _table_info_loader {
         # warn "ColumnInfo [$table_name/$column_name] = $column_type\n";
 
         $column_set{$column_name}  = $column_type;
-        $size_limit{$column_name}  = $size_limit;
+
+        # PostgreSQL reports a COLUMN_SIZE of 4 for enums, which is not compatible with
+        # the way slicer does column size checking. Likewise, PostgreSQL reports
+        # a user-defined TYPE_NAME for enums, rather than 'enum'. Therefore, if
+        # the DB is PostgreSQL, only set size_limit for varchars
+        unless (($driver eq 'pgsql') && !($column_type eq 'character varying')) {
+            $size_limit{$column_name}  = $size_limit;
+        }
 
         if( ($column_name eq $table_name.'_id')
          or ($table_name eq 'analysis_base' and $column_name eq 'analysis_id') ) {    # a special case (historical)