Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Facilitate CACTUS_DB alignment views in all divisions #1061

Merged
merged 4 commits into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions modules/EnsEMBL/Web/Component/Compara_Alignments.pm
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ sub content {
my ($align, $target_species, $target_slice_name_range) = split '--', $align_param;
my $target_slice = $object->get_target_slice;

## Check for wrong component in per-component polyploid alignments.
my $warning_box = $self->check_for_wrong_genome_component({
'cdb' => $cdb,
'align' => $align,
'slice' => $slice,
});
return $warning_box if $warning_box;
##

my ($alert_box, $error) = $self->check_for_align_problems({
'align' => $align,
'species' => $hub->species_defs->SPECIES_PRODUCTION_NAME,
Expand Down Expand Up @@ -339,6 +348,9 @@ sub check_for_missing_species {
return $warnings ? ({'severity' => 'info', 'title' => $title, 'message' => $warnings}) : ();
}

# Stub for use in divisions with polyploid genomes (e.g. Plants)
sub check_for_wrong_genome_component {}

sub show_warnings {
my ($self, $messages) = @_;
return '' unless defined $messages;
Expand Down
53 changes: 53 additions & 0 deletions modules/EnsEMBL/Web/Component/Location/Compara_AlignSliceBottom.pm
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ sub content {
my %aligned_species = map { $_->{'name'} => 1 } @$slices;
my $i = 1;
my (@images, $html);

if ($align_details->{'type'} eq 'CACTUS_DB') {
$html .= $self->show_scale_dependent_track_info_box($align_details);
}

my ($caption_height,$caption_img_offset) = (0,-24);
my $lookup = $species_defs->prodnames_to_urls_lookup;
Expand Down Expand Up @@ -141,6 +145,55 @@ sub content {
return $html;
}

sub show_scale_dependent_track_info_box {
my ($self, $align_details) = @_;
my $species_defs = $self->hub->species_defs;

my $html;
if (exists $align_details->{'as_track_threshold_data'}) {
my $r = $self->param('r');

my $location_length;
if ($r =~ /^[\w\.\-]+:(\d+)\-(\d+)$/) { # region pattern from MetaKeyFormat datacheck
$location_length = abs($2 - $1) + 1;
} else {
$location_length = 1; # This should never happen, but if it does, we revert to default behaviour.
}

my $as_track_thresholds = $align_details->{'as_track_threshold_data'};
if (exists $as_track_thresholds->{'transcript'} && $location_length >= $as_track_thresholds->{'transcript'}) {

my @range_vis_info_parts = (
sprintf(
'gene tracks hidden in regions larger than %d kb',
$self->thousandify($as_track_thresholds->{'transcript'}),
)
);

if (exists $as_track_thresholds->{'sequence'}) {
push(
@range_vis_info_parts,
sprintf(
'contig tracks hidden in regions larger than %d kb',
$self->thousandify($as_track_thresholds->{'sequence'}),
)
);
}

my $help_id = { $species_defs->multiX('ENSEMBL_HELP') }->{'Location/Compara_Alignments/Image'};
$html .= $self->_info('Scale-dependent alignment track configuration',
'<p>Some tracks in this Cactus image alignment are disabled by default at larger scales, with '
. join(', and ', @range_vis_info_parts)
. '. Tracks hidden in this way can be revealed by zooming in, or by enabling them directly'
. ' via "<strong>Configure this page</strong>" or "<strong>Add/remove tracks</strong>".'
. sprintf(' For more information, see the <a href="/Help/View?id=%d" class="popup">Alignments (image) help page</a>.', $help_id)
);
}
}

return $html;
}

sub export_options { return {'action' => 'Alignments', 'caption' => 'Download alignment'}; }

sub get_export_data {
Expand Down
16 changes: 16 additions & 0 deletions modules/EnsEMBL/Web/ConfigPacker.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,7 @@ sub _summarise_compara_db {
my $constrained_elements = {};
my %valid_species = map { $_ => 1 } keys %{$self->full_tree};

my $cactus_db_found = 0;
foreach my $row (@$res_aref) {
my ($class, $type, $species, $name, $id, $species_set_id) = ($row->[0], uc $row->[1], $row->[2], $row->[3], $row->[4], $row->[5]);
my $key = 'ALIGNMENTS';
Expand All @@ -1262,6 +1263,8 @@ sub _summarise_compara_db {
$constrained_elements->{$species_set_id} = $id;
} elsif ($type !~ /EPO_LOW_COVERAGE/ && ($class =~ /tree_alignment/ || $type =~ /EPO/)) {
$self->db_tree->{$db_name}{$key}{$id}{'species'}{'ancestral_sequences'} = 1 unless exists $self->db_tree->{$db_name}{$key}{$id};
} elsif ($type eq 'CACTUS_DB') {
$cactus_db_found = 1;
}

if ($intra_species{$species_set_id}) {
Expand All @@ -1275,6 +1278,19 @@ sub _summarise_compara_db {
$self->db_tree->{$db_name}{$key}{$id}{'species_set_id'} = $species_set_id;
$self->db_tree->{$db_name}{$key}{$id}{'species'}{$species} = 1;
}

if ($cactus_db_found) {
$res_aref = $dbh->selectall_arrayref('
select method_link_species_set_id, value
from method_link_species_set_tag
where tag = "align_slice_track_threshold_data"
');

foreach my $row (@$res_aref) {
my ($alignment_id, $as_track_threshold_json) = ($row->[0], $row->[1]);
$self->db_tree->{$db_name}{'ALIGNMENTS'}{$alignment_id}{'as_track_threshold_data'} = from_json($as_track_threshold_json);
}
}

foreach my $species_set_id (keys %$constrained_elements) {
my $constr_elem_id = $constrained_elements->{$species_set_id};
Expand Down
33 changes: 18 additions & 15 deletions modules/EnsEMBL/Web/Document/HTML/Compara.pm
Original file line number Diff line number Diff line change
Expand Up @@ -333,15 +333,26 @@ sub get_species_info {
}
}

## Lookup table from species name to genome_db
my $genome_db_name_hash = {};
## Lookup table from species name to genome_db stats
my $genome_db_stats_hash = {};
if ($mlss) {
my $genome_db_id_2_node_hash = $mlss && $mlss->species_tree && $mlss->species_tree->get_genome_db_id_2_node_hash;
foreach my $genome_db (@{$mlss->species_set->genome_dbs}) {
## Set coverage stats from species-tree node tag or MLSS tag, as available
my $id = $genome_db->dbID;
my $species_tree_name = $genome_db->name;
$genome_db_name_hash->{$species_tree_name} = $genome_db;
$genome_db_stats_hash->{$species_tree_name}{'assembly'} = $genome_db->assembly;
my @stats = qw(genome_coverage genome_length coding_exon_coverage coding_exon_length);
foreach (@stats) {
if ($genome_db_id_2_node_hash && exists $genome_db_id_2_node_hash->{$id}
&& defined $genome_db_id_2_node_hash->{$id}->get_value_for_tag($_)) {
$genome_db_stats_hash->{$species_tree_name}{$_} = $genome_db_id_2_node_hash->{$id}->get_value_for_tag($_);
} elsif (defined $mlss->get_value_for_tag($_.'_'.$id)) {
$genome_db_stats_hash->{$species_tree_name}{$_} = $mlss->get_value_for_tag($_.'_'.$id);
}
}
}
}
my $genome_db_id_2_node_hash = $mlss && $mlss->species_tree && $mlss->species_tree->get_genome_db_id_2_node_hash;

## Now munge information for selected species
foreach my $sp (@$species_order) {
Expand All @@ -357,17 +368,9 @@ sub get_species_info {

if ($mlss) {
my $prod_name = $hub->species_defs->get_config($sp, 'SPECIES_PRODUCTION_NAME');
my $gdb = $genome_db_name_hash->{$prod_name};
$info->{$sp}{'assembly'} = $gdb->assembly;
## Add coverage stats
my $id = $gdb->dbID;
my @stats = qw(genome_coverage genome_length coding_exon_coverage coding_exon_length);
foreach (@stats) {
if ($genome_db_id_2_node_hash && exists $genome_db_id_2_node_hash->{$id} && defined $genome_db_id_2_node_hash->{$id}->get_value_for_tag($_)) {
$info->{$sp}{$_} = $genome_db_id_2_node_hash->{$id}->get_value_for_tag($_);
} else {
$info->{$sp}{$_} = $mlss->get_value_for_tag($_.'_'.$id);
}
## Add assembly name and coverage stats
while ( my ($key, $value) = each(%{$genome_db_stats_hash->{$prod_name}}) ) {
$info->{$sp}{$key} = $value;
}
}
}
Expand Down
13 changes: 0 additions & 13 deletions modules/EnsEMBL/Web/ImageConfig/alignsliceviewbottom.pm
Original file line number Diff line number Diff line change
Expand Up @@ -101,19 +101,6 @@ sub init_cacheable {
[ 'variation_legend' ],
{ accumulate => 'yes' }
);

my $align_params = $self->hub->referer->{'params'}{'align'}[0];
my ($align) = split '--', $align_params;
my $align_type = $self->species_defs->multi_hash->{'DATABASE_COMPARA'}{'ALIGNMENTS'}{$align}{'type'};
if ($align_type eq 'CACTUS_DB') {
my $node = $self->get_node('transcript');
my @transcript_tracks = grep { $_->get_data('node_type') eq 'track' } @{$node->get_all_nodes};
foreach my $transcript_track (@transcript_tracks) {
if ($species ne $self->hub->referer->{'ENSEMBL_SPECIES'}) {
$transcript_track->set_data('display', 'off');
}
}
}
}

sub species_list {
Expand Down
51 changes: 46 additions & 5 deletions modules/EnsEMBL/Web/ImageConfigExtension/Tracks.pm
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ sub add_genes {
}

# Adding gencode basic track, this has been moved from each image config to this generic one
if (my $gencode_version = $self->species_defs->GENCODE_VERSION || "") {
if (my $gencode_version = $self->species_defs->get_config($species, 'GENCODE_VERSION') || "") {
$self->add_track('transcript', 'gencode_basic', "Basic Gene Annotations from $gencode_version", '_gencode_basic', {
labelcaption => "Genes (Basic set from $gencode_version)",
display => 'off',
Expand All @@ -492,7 +492,7 @@ sub add_genes {
}

# Adding gencode primary track
if (my $gencode_version = $self->species_defs->GENCODE_VERSION || "") {
if (my $gencode_version = $self->species_defs->get_config($species, 'GENCODE_VERSION') || "") {
$self->add_track('transcript', 'gencode_primary', "Primary Gene Annotations from $gencode_version", '_gencode_primary', {
labelcaption => "Genes (Primary set from $gencode_version)",
display => 'transcript_label',
Expand All @@ -513,9 +513,9 @@ sub add_genes {
],
});
}

# Adding MANE tracks (Only for Humans)
if($self->hub->species_defs->SEPARATE_MANE_TRACKS){
if($self->species_defs->get_config($species, 'SEPARATE_MANE_TRACKS')){

# Adding MANE Select track
$self->add_track('transcript', 'mane_select', "MANE Select Transcripts", '_mane_select', {
Expand Down Expand Up @@ -557,7 +557,7 @@ sub add_genes {
# Need to add the gene menu track here
$self->add_track('information', 'gene_legend', 'Gene Legend', 'gene_legend', { strand => 'r' }) if $flag;

if($self->species_defs->GENCODE_VERSION) {
if($self->species_defs->get_config($species, 'GENCODE_VERSION')) {
# Disable comprehensive geneset track and enable primary gencode ones
$self->modify_configs(['transcript_core_ensembl'],{ 'display' => 'off' });
$self->modify_configs(['gencode_primary'], { 'display' => 'transcript_label' });
Expand All @@ -566,6 +566,47 @@ sub add_genes {
$self->modify_configs(['transcript_core_ensembl'],{ description => 'The <a class="popup" href="/Help/Glossary?id=487">GENCODE Comprehensive</a> set is the gene set for human and mouse' });
}

# If this is an alignslice track in a large-scale CACTUS_DB alignment
# view, disable selected tracks in order to reduce load times.
if ($self->type eq 'alignsliceviewbottom') {

my $align_id = exists $self->hub->referer->{'params'}{'align'}
? $self->hub->referer->{'params'}{'align'}[0]
: $self->hub->get_alignment_id
;

if ($align_id) {
my $align_details = $self->species_defs->multi_hash->{'DATABASE_COMPARA'}->{'ALIGNMENTS'}->{$align_id};
if ($align_details->{'type'} eq 'CACTUS_DB' && exists $align_details->{'as_track_threshold_data'}) {
my $location_param = $self->hub->referer->{'params'}{'r'}[0];

my $location_length;
if ($location_param =~ /^[\w\.\-]+:(\d+)\-(\d+)$/) { # region pattern from MetaKeyFormat datacheck
$location_length = abs($2 - $1) + 1;
} else {
$location_length = 1; # This should never happen, but if it does, we revert to default behaviour.
}

my $as_track_thresholds = $align_details->{'as_track_threshold_data'};
if (exists $as_track_thresholds->{'transcript'} && $location_length >= $as_track_thresholds->{'transcript'}) {

my $transcript_node_ids = $self->species_defs->get_config($species, 'GENCODE_VERSION')
? ['gencode_primary', 'transcript']
: ['transcript']
;

# At large scales, disable transcript tracks.
$self->modify_configs($transcript_node_ids, { 'display' => 'off' });

if (exists $as_track_thresholds->{'sequence'} && $location_length >= $as_track_thresholds->{'sequence'}) {
# At larger scales still, disable sequence tracks.
$self->modify_configs(['sequence'], { 'display' => 'off' });
}
}
}
}
}

}

sub add_trans_associated {
Expand Down