Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/cli-help' into rust
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov committed Jan 13, 2025
2 parents c43ac39 + 8c6a781 commit 007411b
Show file tree
Hide file tree
Showing 9 changed files with 27 additions and 31 deletions.
4 changes: 2 additions & 2 deletions packages/pangraph/src/align/alignment_args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ pub struct AlignmentArgs {
#[clap(value_hint = ValueHint::Other)]
pub indel_len_threshold: usize,

/// Energy cost for introducing junction due to alignment merger
/// Energy cost for splitting a block during alignment merger. Controls graph fragmentation, see documentation.
#[default = 100.0]
#[clap(long, short = 'a', default_value_t = AlignmentArgs::default().alpha)]
#[clap(value_hint = ValueHint::Other)]
pub alpha: f64,

/// Energy cost for interblock diversity due to alignment merger
/// Energy cost for diversity in the alignment. A high value prevents merging of distantly-related sequences in the same block, see documentation.
#[default = 10.0]
#[clap(long, short = 'b', default_value_t = AlignmentArgs::default().beta)]
#[clap(value_hint = ValueHint::Other)]
Expand Down
12 changes: 2 additions & 10 deletions packages/pangraph/src/commands/build/build_args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ pub enum AlignmentBackend {
Mmseqs,
}

/// Align genomes into a multiple sequence alignment graph
/// Align genomes into a pangenome graph
#[derive(Parser, Debug)]
pub struct PangraphBuildArgs {
/// Path(s) to zero, one or multiple FASTA files with input sequences. Multiple records within one file are treated as separate genomes.
Expand Down Expand Up @@ -63,10 +63,6 @@ pub struct PangraphBuildArgs {
#[clap(long, short = 'c')]
pub circular: bool,

/// Transforms all sequences to upper case
#[clap(long, short = 'u')]
pub upper_case: bool,

/// Maximum number of alignment rounds to consider per pairwise graph merger
#[clap(long, short = 'x', default_value_t = 100)]
#[clap(value_hint = ValueHint::Other)]
Expand All @@ -82,11 +78,7 @@ pub struct PangraphBuildArgs {
#[clap(value_hint = ValueHint::Other)]
pub alignment_kernel: AlignmentBackend,

/// Verify that the original sequences can be reconstructed from the resulting pangraph
/// Sanity check: after construction verifies that the original sequences can be reconstructed exactly from the resulting pangraph. Raises an error otherwise.
#[clap(long, short = 'f')]
pub verify: bool,

/// Random seed for block id generation
#[clap(long)]
pub seed: Option<u64>,
}
5 changes: 1 addition & 4 deletions packages/pangraph/src/commands/build/build_run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,13 @@ use crate::pangraph::pangraph::Pangraph;
use crate::pangraph::strand::Strand::Forward;
use crate::tree::clade::postorder;
use crate::tree::neighbor_joining::build_tree_using_neighbor_joining;
use crate::utils::random::get_random_number_generator;
use crate::{make_internal_error, make_internal_report};
use eyre::{Report, WrapErr};
use itertools::Itertools;
use log::info;

pub fn build_run(args: &PangraphBuildArgs) -> Result<(), Report> {
let PangraphBuildArgs { input_fastas, seed, .. } = &args;

let rng = get_random_number_generator(seed);
let input_fastas = &args.input_fastas;

let fastas = read_many_fasta(input_fastas)?;

Expand Down
6 changes: 3 additions & 3 deletions packages/pangraph/src/commands/export/export_args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ pub enum PangraphExportArgs {
/// Export block consensus sequences to a fasta file
BlockConsensus(PangraphExportBlockConsensusArgs),

/// Export aligned or unaligned sequences for each block. Note that alignments exclude insertions
/// Export aligned or unaligned sequences for each block in separate fasta files. Note that alignments exclude insertions.
BlockSequences(PangraphExportBlockSequencesArgs),

/// Export the core-genome alignment
/// Export the core-genome alignment. Note that alignment excludes insertions.
CoreGenome(PangraphExportCoreAlignmentArgs),
}

Expand Down Expand Up @@ -85,7 +85,7 @@ pub struct PangraphExportBlockSequencesArgs {
#[clap(display_order = 1, value_hint = ValueHint::FilePath)]
pub input_json: Option<PathBuf>,

/// Path to directory to write output FASTA files to
/// Path to directory to write output FASTA files to. Files are named `block_{block_id}.fa` in the folder.
///
/// See: https://en.wikipedia.org/wiki/FASTA_format
#[clap(long, short = 'o', value_hint = ValueHint::AnyPath)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use eyre::{Context, Report};

#[derive(Parser, Debug, Default, Clone)]
pub struct ExportBlockSequencesParams {
/// If set, then the full block sequences are exported but not aligned.
/// If set, then the full non-aligned block sequences are exported.
#[clap(long)]
pub unaligned: bool,
}
Expand Down
10 changes: 5 additions & 5 deletions packages/pangraph/src/commands/root_args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ fn styles() -> styling::Styles {
#[clap(styles = styles())]
/// Bioinformatic toolkit to align large sets of closely related genomes into a graph data structure.
///
/// Finds homology amongst large collections of closely related genomes. The core of the algorithm partitions each genome into pancontigs that represent a sequence interval related by vertical descent. Each genome is then an ordered walk along pancontigs; the collection of all genomes form a graph that captures all observed structural diversity. The tool useful to parsimoniously infer horizontal gene transfer events within a community; perform comparative studies of genome gain, loss, and rearrangement dynamics; or simply to compress many related genomes.
/// Finds homology amongst large collections of closely related genomes. The core of the algorithm partitions each genome into pancontigs (also called blocks) that represent a sequence interval related by vertical descent. Each genome is then an ordered walk along pancontigs. The collection of all genomes form a graph that captures all observed structural diversity. The tool useful to study structural variations in the genome, perform comparative studies of genome gain, loss, and rearrangement dynamics; or simply to compress many related genomes.
///
///
/// Publication: "PanGraph: scalable bacterial pan-genome graph construction. Nicholas Noll, Marco Molari, Richard Neher. bioRxiv 2022.02.24.481757; doi: https://doi.org/10.1101/2022.02.24.481757"
/// Publication: "PanGraph: scalable bacterial pan-genome graph construction."" Nicholas Noll, Marco Molari, Richard Neher. Microbial Genomics 9.6 (2023): 001034.; doi: https://doi.org/10.1099/mgen.0.001034"
///
/// Documentation: https://pangraph.readthedocs.io/en/stable/
///
/// Source code:https://github.com/neherlab/pangraph
/// Source code: https://github.com/neherlab/pangraph
///
/// Questions, ideas, bug reports: https://github.com/neherlab/pangraph/issues
pub struct PangraphArgs {
Expand Down Expand Up @@ -71,10 +71,10 @@ pub enum PangraphCommands {
args: PangraphExportArgs,
},

/// Compute all pairwise marginalizations of a multiple sequence alignment graph
/// Generates a simplified graph that only contains a subset of the input genomes.
Simplify(PangraphSimplifyArgs),

/// Reconstruct input fasta sequences from graph
/// Reconstruct all input fasta sequences from graph
Reconstruct(PangraphReconstructArgs),

/// Generate JSON schema for Pangraph file format
Expand Down
2 changes: 1 addition & 1 deletion packages/pangraph/src/commands/simplify/simplify_args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use clap::{Parser, ValueHint};
use std::fmt::Debug;
use std::path::PathBuf;

/// Computes all pairwise marginalizations of a multiple sequence alignment graph
/// Generates a simplified graph that only contains a subset of the input genomes.
#[derive(Parser, Debug)]
pub struct PangraphSimplifyArgs {
/// Path to Pangraph JSON.
Expand Down
8 changes: 4 additions & 4 deletions packages/pangraph/src/io/fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ impl<'a> FastaReader<'a> {
let fragment = self
.line
.chars()
.filter(|c| is_char_allowed(*c))
.map(|c| c.to_ascii_uppercase());
.map(|c| c.to_ascii_uppercase())
.filter(|c| is_char_allowed(*c));

record.seq.extend(fragment);

Expand All @@ -157,8 +157,8 @@ impl<'a> FastaReader<'a> {
let fragment = self
.line
.chars()
.filter(|c| is_char_allowed(*c))
.map(|c| c.to_ascii_uppercase());
.map(|c| c.to_ascii_uppercase())
.filter(|c| is_char_allowed(*c));

record.seq.extend(fragment);
}
Expand Down
9 changes: 8 additions & 1 deletion packages/pangraph/src/pangraph/graph_merging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::utils::interval::{have_no_overlap, Interval};
use crate::utils::map_merge::{map_merge, ConflictResolution};
use eyre::{Report, WrapErr};
use itertools::Itertools;
use log::{debug, trace};
use log::{debug, trace, warn};
use maplit::btreemap;
use ordered_float::OrderedFloat;
use rayon::prelude::*;
Expand Down Expand Up @@ -48,9 +48,16 @@ pub fn merge_graphs(
graph = graph_new;

// stop when no more mergers are possible
// or when the maximum number of iterations is reached
if !has_changed {
debug!("Graph merge {left_keys} <---> {right_keys} complete.");
break;
} else if i >= args.max_self_map {
warn!(
"Reached maximum number of self-merge iterations at graph merging {left_keys} <---> {right_keys}, consider increasing the current limit -x {}",
args.max_self_map
);
break;
}
i += 1;
}
Expand Down

0 comments on commit 007411b

Please sign in to comment.