diff --git a/core/species.cpp b/core/species.cpp index 17851581..7d177c34 100644 --- a/core/species.cpp +++ b/core/species.cpp @@ -7684,6 +7684,7 @@ void Species::__CreateSubpopulationsFromTabulation(std::unordered_map pedigree_id_check; gSLiM_next_pedigree_id = 0; @@ -7809,12 +7810,12 @@ void Species::__CreateSubpopulationsFromTabulation(std::unordered_map genome_id_check; + + for (tsk_size_t j = 0; (size_t)j < node_count; j++) + { + tsk_size_t offset1 = node_table.metadata_offset[j]; + tsk_size_t offset2 = node_table.metadata_offset[j + 1]; + tsk_size_t length = (offset2 - offset1); + + // allow nodes with other types of metadata; but if the metadata length metches ours, we have to assume it's ours + if (length == sizeof(GenomeMetadataRec)) + { + // get the metadata record and check the genome pedigree ID + GenomeMetadataRec *metadata_rec = (GenomeMetadataRec *)(node_table.metadata + offset1); + slim_genomeid_t genome_id = metadata_rec->genome_id_; + + genome_id_check.emplace_back(genome_id); // we will test for collisions below + + slim_pedigreeid_t pedigree_id = genome_id / 2; // rounds down to integer + + if (pedigree_id >= gSLiM_next_pedigree_id) + { + static bool been_here = false; + + if (!been_here) + { + // decided to keep this as a warning; this circumstance is not necessarily pathological, but it probably usually is... + p_interpreter->ErrorOutputStream() << "#WARNING (Species::__CheckNodePedigreeIDs): in reading the tree sequence, a node was encountered with a genome pedigree ID that was (after division by 2) greater than the largest individual pedigree ID in the tree sequence. This is not necessarily an error, but it is highly unusual, and could indicate that the tree sequence file is corrupted. It may happen due to external manipulations of a tree sequence, or perhaps if an unsimplified tree sequence produced by SLiM is being loaded." << std::endl; + been_here = true; + } + + gSLiM_next_pedigree_id = pedigree_id + 1; + } + } + } + + // Check for genome pedigree ID collisions by sorting and looking for duplicates + std::sort(genome_id_check.begin(), genome_id_check.end()); + const auto duplicate = std::adjacent_find(genome_id_check.begin(), genome_id_check.end()); + + if (duplicate != genome_id_check.end()) + EIDOS_TERMINATION << "ERROR (Species::__CheckNodePedigreeIDs): the genome pedigree ID value " << *duplicate << " was used more than once; genome pedigree IDs must be unique." << EidosTerminate(); +} + void Species::_InstantiateSLiMObjectsFromTables(EidosInterpreter *p_interpreter, slim_tick_t p_metadata_tick, slim_tick_t p_metadata_cycle, SLiMModelType p_file_model_type, int p_file_version, SUBPOP_REMAP_HASH &p_subpop_map) { // set the tick and cycle from the provenance data @@ -8464,6 +8520,10 @@ void Species::_InstantiateSLiMObjectsFromTables(EidosInterpreter *p_interpreter, if (ret != 0) handle_error("_InstantiateSLiMObjectsFromTables tsk_treeseq_free()", ret); free(ts); + // Ensure that the next pedigree ID used will not cause a collision with any existing nodes in the node table, + // and that there are no duplicate node pedigree IDs in the input file (whether in use or not). + __CheckNodePedigreeIDs(p_interpreter); + // Set up the remembered genomes by looking though the list of nodes and their individuals if (remembered_genomes_.size() != 0) EIDOS_TERMINATION << "ERROR (Species::_InstantiateSLiMObjectsFromTables): (internal error) remembered_genomes_ is not empty." << EidosTerminate(); diff --git a/core/species.h b/core/species.h index 728ce458..2dc8c44d 100644 --- a/core/species.h +++ b/core/species.h @@ -605,6 +605,7 @@ class Species : public EidosDictionaryUnretained void __TallyMutationReferencesWithTreeSequence(std::unordered_map &p_mutMap, std::unordered_map p_nodeToGenomeMap, tsk_treeseq_t *p_ts); void __CreateMutationsFromTabulation(std::unordered_map &p_mutInfoMap, std::unordered_map &p_mutIndexMap); void __AddMutationsFromTreeSequenceToGenomes(std::unordered_map &p_mutIndexMap, std::unordered_map p_nodeToGenomeMap, tsk_treeseq_t *p_ts); + void __CheckNodePedigreeIDs(EidosInterpreter *p_interpreter); void _InstantiateSLiMObjectsFromTables(EidosInterpreter *p_interpreter, slim_tick_t p_metadata_tick, slim_tick_t p_metadata_cycle, SLiMModelType p_file_model_type, int p_file_version, SUBPOP_REMAP_HASH &p_subpop_map); // given tree-seq tables, makes individuals, genomes, and mutations slim_tick_t _InitializePopulationFromTskitTextFile(const char *p_file, EidosInterpreter *p_interpreter, SUBPOP_REMAP_HASH &p_subpop_map); // initialize the population from an tskit text file slim_tick_t _InitializePopulationFromTskitBinaryFile(const char *p_file, EidosInterpreter *p_interpreter, SUBPOP_REMAP_HASH &p_subpop_remap); // initialize the population from an tskit binary file