diff --git a/Project.toml b/Project.toml index 3ba17ca..b762145 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" @@ -17,25 +18,24 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] +Aqua = "0.8" CondaPkg = "0.2" Dates = "1" Distributed = "1" Graphs = "1.0" Images = "0.20, 0.26" +LinearAlgebra = "1" MbedTLS_jll = "2.28.6" Printf = "1" PythonCall = "0.9" Reexport = "1.0" Serialization = "1" Statistics = "1" -Aqua = "0.8" -JuliaFormatter = "1.0" Test = "1" julia = "1" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" -JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] diff --git a/docs/make.jl b/docs/make.jl index fa219b7..1e0c0ab 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -19,6 +19,7 @@ makedocs(; "Home" => "index.md", "Getting Started" => "getting-started.md", "Practical Examples" => "examples.md", + "For Developers" => "for-developers.md", "API Reference" => [ "Basic I/O" => "api/io.md", "Molecular Operations" => "api/operations.md", @@ -33,6 +34,7 @@ makedocs(; "Similarity" => "api/similarity.md", "Standardization" => "api/standardization.md", "Conformers" => "api/conformers.md", + "Alignment" => "api/alignment.md", "Fragmentation" => "api/fragmentation.md", "Graph Operations" => "api/graph.md", "Progress Tracking" => "api/progress.md", diff --git a/docs/src/api/alignment.md b/docs/src/api/alignment.md new file mode 100644 index 0000000..6917189 --- /dev/null +++ b/docs/src/api/alignment.md @@ -0,0 +1,61 @@ +# Molecular Alignment + +## align_mol + +```@docs +align_mol +``` + +## calc_rms + +```@docs +calc_rms +``` + +## `get_best_rms` + +```@docs +get_best_rms +``` + +## `get_alignment_transform` + +```@docs +get_alignment_transform +``` + +## `apply_transform` + +```@docs +apply_transform +``` + +## `random_transform` + +```@docs +random_transform +``` + +## O3AResult + +```@docs +O3AResult +``` + +## `get_o3a` + +```@docs +get_o3a +``` + +## `get_crippen_o3a` + +```@docs +get_crippen_o3a +``` + +## `o3a_align!` + +```@docs +o3a_align! +``` \ No newline at end of file diff --git a/docs/src/for-developers.md b/docs/src/for-developers.md new file mode 100644 index 0000000..1f3803b --- /dev/null +++ b/docs/src/for-developers.md @@ -0,0 +1,130 @@ +# For Developers + +## Architecture Overview + +MoleculeFlow.jl is built as a Julia wrapper around +[RDKit](https://github.com/rdkit/rdkit) cheminformatics library. +Rather than reimplementing complex +cheminformatics algorithms from scratch, we leverage RDKit's mature and +battle-tested implementations while providing a clean, Julia-native +interface, which also allows to build additional functionality on top. + +!!! info + Knowledge of Python RDKit is assumed. + This background knowledge is essential because MoleculeFlow's functions are direct mappings to RDKit functionality. + +## Development Pattern + +### 1. Low-Level Caching Layer + +All RDKit functionality is accessed through caching functions in `src/rdkit.jl`. This file contains functions that: +- Import and cache Python RDKit modules and functions using `@pyconst` +- Provide direct access to RDKit functions +- Handle the Python-Julia interface via PythonCall.jl + +!!! info + The `@pyconst` macro from PythonCall.jl is used to cache Python function calls for better performance. This ensures that the Python import and function lookup only happens once, and subsequent calls reuse the cached function object. + +Example from `rdkit.jl`: +```julia +# RDKit function caching with @pyconst +_mol_from_smiles(smiles::String) = @pyconst(pyimport("rdkit.Chem").MolFromSmiles)(smiles) +_mol_to_smiles(mol::Py) = @pyconst(pyimport("rdkit.Chem").MolToSmiles)(mol) + +function _mol_wt(mol::Py) + @pyconst(pyimport("rdkit.Chem.Descriptors").MolWt)(mol) +end + +function _heavy_atom_count(mol::Py) + @pyconst(pyimport("rdkit.Chem.Descriptors").HeavyAtomCount)(mol) +end +``` + +### 2. Higher-Level Julia Wrappers + +The main API functions are implemented as Julia wrappers around the cached RDKit functions. + +Example wrapper pattern: +```julia +function molecular_weight(mol::Molecule) + !mol.valid && return missing + + try + result = _mol_wt(mol._rdkit_mol) + return pyconvert(Float64, result) + catch + return missing + end +end + +function get_atom(mol::Molecule, idx::Int) + !mol.valid && throw(ArgumentError("Invalid molecule")) + + # Convert indices (Julia 1-based → Python 0-based) + python_idx = idx - 1 + + rdkit_atom = mol._rdkit_mol.GetAtomWithIdx(python_idx) + return Atom(rdkit_atom, mol) +end +``` + +## Critical Index Conversion + +!!! danger + Index Conversion is Critical: Julia uses 1-based indexing while Python uses 0-based indexing. + +### Examples of Index Handling +```julia + +function get_atom(mol::Molecule, idx::Int) + rdkit_mol = mol._rdkit_mol + rdkit_idx = idx - 1 + return get_chem().GetAtomWithIdx(rdkit_mol, rdkit_idx) +end +``` + +## Error Handling Patterns + +### Molecule Validation +Always check molecule validity: +```julia +function some_function(mol::Molecule) + !mol.valid && throw(ArgumentError("Invalid molecule")) + # ... proceed with function +end +``` + +### Graceful Degradation +For operations that may fail, return appropriate fallback values: +```julia +function calculate_descriptor(mol::Molecule) + !mol.valid && return missing + + try + result = rdkit_calculation(mol._rdkit_mol) + return convert_result(result) + catch + return missing # Or appropriate fallback + end +end +``` + +## Contributing New Functions + +When adding new functionality: + +1. **Add RDKit module caching** to `src/rdkit.jl` if needed +2. **Implement the wrapper** in the appropriate module file +3. **Export the function** in `src/MoleculeFlow.jl` +4. **Add comprehensive tests** following existing patterns +5. **Document with examples** in the appropriate API docs file +6. **Handle index conversion** carefully +7. **Follow existing error handling patterns** + +## Best Practices + +1. **Follow existing naming conventions** (snake_case for functions) +2. **Use keyword arguments** for optional parameters +3. **Provide sensible defaults** where possible +4. **Handle missing/invalid inputs gracefully** +5. **Add type annotations** where helpful diff --git a/src/MoleculeFlow.jl b/src/MoleculeFlow.jl index 9d89b1d..4e387b3 100644 --- a/src/MoleculeFlow.jl +++ b/src/MoleculeFlow.jl @@ -187,6 +187,11 @@ export FeatureFactory, ChemicalFeature export create_feature_factory, get_mol_features, pharmacophore_fingerprint export get_feature_families, filter_features_by_family, get_pharmacophore_3d +# Molecular alignment functions +export align_mol, calc_rms, get_best_rms +export get_alignment_transform, random_transform, apply_transform +export O3AResult, get_o3a, get_crippen_o3a, o3a_align! + include("./config.jl") include("./utils.jl") include("./rdkit.jl") @@ -197,6 +202,7 @@ include("./molecule/fingerprints.jl") include("./molecule/similarity.jl") include("./molecule/graph.jl") include("./molecule/conformers.jl") +include("./molecule/alignment.jl") include("./atom/atom.jl") include("./atom/descriptors.jl") include("./bond/bond.jl") diff --git a/src/molecule/alignment.jl b/src/molecule/alignment.jl new file mode 100644 index 0000000..1567ba0 --- /dev/null +++ b/src/molecule/alignment.jl @@ -0,0 +1,819 @@ +####################################################### +# Molecular Alignment Functions +####################################################### +using LinearAlgebra + +""" + align_mol(probe_mol::Molecule, ref_mol::Molecule; + probe_conf_id::Int=-1, ref_conf_id::Int=-1, + atom_map=nothing, weights=nothing, reflect::Bool=false, + max_iterations::Int=50) -> Float64 + +Optimally align a probe molecule to a reference molecule to minimize RMSD. + +# Arguments + + - `probe_mol::Molecule`: The molecule to be aligned (will be modified in place) + - `ref_mol::Molecule`: The reference molecule to align to (remains unchanged) + - `probe_conf_id::Int=-1`: Conformer ID of the probe molecule (-1 for default/first conformer) + - `ref_conf_id::Int=-1`: Conformer ID of the reference molecule (-1 for default/first conformer) + - `atom_map=nothing`: Optional atom mapping as vector of tuples `[(probe_idx, ref_idx), ...]` using 1-based indexing + - `weights=nothing`: Optional vector of weights for atoms during alignment (same order as atoms) + - `reflect::Bool=false`: Whether to allow reflection (improper rotations) during alignment + - `max_iterations::Int=50`: Maximum iterations for iterative alignment algorithms + +# Returns + + - `Float64`: The root-mean-square deviation (RMSD) after alignment, or `Inf` if alignment fails + +# Throws + + - `ArgumentError`: If either molecule is invalid + +# Notes + + - Both molecules must have 3D coordinates (conformers) + - The probe molecule is modified in place - its coordinates are transformed + - Atom mapping uses 1-based Julia indexing (not 0-based Python indexing) + - Returns `Inf` if no valid alignment can be found + +# Examples + +```julia +# Basic alignment +mol1_base = mol_from_smiles("CCO") +mol2_base = mol_from_smiles("CCO") +conformers1 = generate_3d_conformers(mol1_base, 1) +conformers2 = generate_3d_conformers(mol2_base, 1) +mol1 = conformers1[1].molecule +mol2 = conformers2[1].molecule +rmsd = align_mol(mol1, mol2) + +# Alignment with atom mapping (align first 3 atoms) +atom_map = [(1, 1), (2, 2), (3, 3)] +rmsd = align_mol(mol1, mol2; atom_map = atom_map) + +# Alignment allowing reflection +rmsd = align_mol(mol1, mol2; reflect = true) +``` +""" +function align_mol( + probe_mol::Molecule, + ref_mol::Molecule; + probe_conf_id::Int = -1, + ref_conf_id::Int = -1, + atom_map = nothing, + weights = nothing, + reflect::Bool = false, + max_iterations::Int = 50, +) + if !probe_mol.valid || !ref_mol.valid + throw(ArgumentError("Both molecules must be valid")) + end + + try + # Use RDKit's AlignMol function + if atom_map === nothing && weights === nothing + # Simple alignment without atom mapping or weights + rmsd = _align_mol( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol; + prbCid = probe_conf_id, + refCid = ref_conf_id, + reflect = reflect, + maxIters = max_iterations, + ) + else + # Alignment with atom mapping and/or weights + atom_map_py = + atom_map === nothing ? pylist([]) : + pylist([(i - 1, j - 1) for (i, j) in atom_map]) + weights_py = weights === nothing ? pylist([]) : pylist(weights) + + rmsd = _align_mol_with_map( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol, + atom_map_py, + weights_py; + prbCid = probe_conf_id, + refCid = ref_conf_id, + reflect = reflect, + maxIters = max_iterations, + ) + end + + return pyconvert(Float64, rmsd) + catch e + @warn "Error in align_mol: $e" + return Inf + end +end + +""" + calc_rms(probe_mol::Molecule, ref_mol::Molecule; + probe_conf_id::Int=-1, ref_conf_id::Int=-1, + weights=nothing, transform_probe::Bool=false) -> Float64 + +Calculate the root-mean-square distance between two molecules without performing alignment. + +This function computes the RMS distance between corresponding atoms in two molecules. Unlike +`align_mol`, this function does not perform any alignment - it calculates the RMS distance +between the molecules in their current orientations. Optionally, it can consider molecular +symmetry and transform the probe molecule to find the best RMS. + +# Arguments + + - `probe_mol::Molecule`: The probe molecule for comparison + - `ref_mol::Molecule`: The reference molecule for comparison + - `probe_conf_id::Int=-1`: Conformer ID of the probe molecule (-1 for default/first conformer) + - `ref_conf_id::Int=-1`: Conformer ID of the reference molecule (-1 for default/first conformer) + - `weights=nothing`: Optional vector of weights for atoms during calculation + - `transform_probe::Bool=false`: If `true`, optimally transform the probe to minimize RMS (similar to alignment) + +# Returns + + - `Float64`: The root-mean-square distance between molecules in Å, or `Inf` if calculation fails + +# Throws + + - `ArgumentError`: If either molecule is invalid + +# Notes + + - Both molecules must have 3D coordinates (conformers) + - If `transform_probe=false`, calculates RMS in current orientations (no alignment) + - If `transform_probe=true`, finds optimal transformation to minimize RMS + - Can handle molecular symmetry automatically + - For explicit atom mapping, use the `align_mol` function instead + +# Examples + +```julia +# Basic RMS calculation (no alignment) +mol1_base = mol_from_smiles("CCO") +mol2_base = mol_from_smiles("CCO") +conformers1 = generate_3d_conformers(mol1_base, 1) +conformers2 = generate_3d_conformers(mol2_base, 1) +mol1 = conformers1[1].molecule +mol2 = conformers2[1].molecule +rms = calc_rms(mol1, mol2) + +# RMS with optimal transformation +rms_aligned = calc_rms(mol1, mol2; transform_probe = true) + +# RMS with weights +weights = [1.0, 1.0, 2.0] # Give more weight to the third atom +rms_weighted = calc_rms(mol1, mol2; weights = weights) +``` +""" +function calc_rms( + probe_mol::Molecule, + ref_mol::Molecule; + probe_conf_id::Int = -1, + ref_conf_id::Int = -1, + weights = nothing, + transform_probe::Bool = false, +) + if !probe_mol.valid || !ref_mol.valid + throw(ArgumentError("Both molecules must be valid")) + end + + try + if weights === nothing + rms = _calc_rms( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol; + prbCid = probe_conf_id, + refCid = ref_conf_id, + transform = transform_probe, + ) + else + weights_py = pylist(weights) + rms = _calc_rms_with_weights( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol, + weights_py; + prbCid = probe_conf_id, + refCid = ref_conf_id, + transform = transform_probe, + ) + end + + return pyconvert(Float64, rms) + catch e + @warn "Error in calc_rms: $e" + return Inf + end +end + +""" + get_best_rms(probe_mol::Molecule, ref_mol::Molecule; + probe_conf_id::Int=-1, ref_conf_id::Int=-1, + weights=nothing, max_matches::Int=1000000) -> Float64 + +Calculate the best possible RMS between two molecules considering molecular symmetry. + +This function finds the optimal RMS distance by considering all possible symmetry-equivalent +alignments between the two molecules. It explores different atom mappings that arise from +molecular symmetry to find the minimum possible RMS distance. This is particularly useful +for symmetric molecules like benzene where multiple equivalent alignments are possible. + +# Arguments + + - `probe_mol::Molecule`: The probe molecule for comparison + - `ref_mol::Molecule`: The reference molecule for comparison + - `probe_conf_id::Int=-1`: Conformer ID of the probe molecule (-1 for default/first conformer) + - `ref_conf_id::Int=-1`: Conformer ID of the reference molecule (-1 for default/first conformer) + - `weights=nothing`: Optional vector of weights for atoms during calculation + - `max_matches::Int=1000000`: Maximum number of symmetry-equivalent matches to consider + +# Returns + + - `Float64`: The minimum RMS distance considering all symmetry-equivalent alignments in Å, or `Inf` if calculation fails + +# Throws + + - `ArgumentError`: If either molecule is invalid + +# Notes + + - Both molecules must have 3D coordinates (conformers) + - This function is computationally more expensive than `calc_rms` as it explores multiple alignments + - Particularly useful for symmetric molecules (e.g., benzene, cyclohexane) + - The probe molecule is transformed to find the best possible alignment + - May take longer for highly symmetric molecules + - For explicit atom mapping, use the `align_mol` function instead + +# Examples + +```julia +# Best RMS for symmetric molecules (benzene) +mol1_base = mol_from_smiles("c1ccccc1") # benzene +mol2_base = mol_from_smiles("c1ccccc1") # benzene +conformers1 = generate_3d_conformers(mol1_base, 1) +conformers2 = generate_3d_conformers(mol2_base, 1) +mol1 = conformers1[1].molecule +mol2 = conformers2[1].molecule + +# Regular RMS (may not be optimal due to symmetry) +regular_rms = calc_rms(mol1, mol2) + +# Best RMS considering symmetry +best_rms = get_best_rms(mol1, mol2) +# best_rms <= regular_rms due to symmetry considerations + +# Limit the number of symmetry matches to consider +best_rms_limited = get_best_rms(mol1, mol2; max_matches = 100) +``` +""" +function get_best_rms( + probe_mol::Molecule, + ref_mol::Molecule; + probe_conf_id::Int = -1, + ref_conf_id::Int = -1, + weights = nothing, + max_matches::Int = 1000000, +) + if !probe_mol.valid || !ref_mol.valid + throw(ArgumentError("Both molecules must be valid")) + end + + try + if weights === nothing + rms = _get_best_rms( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol; + prbCid = probe_conf_id, + refCid = ref_conf_id, + maxMatches = max_matches, + ) + else + weights_py = pylist(weights) + rms = _get_best_rms_with_weights( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol, + weights_py; + prbCid = probe_conf_id, + refCid = ref_conf_id, + maxMatches = max_matches, + ) + end + + return pyconvert(Float64, rms) + catch e + @warn "Error in get_best_rms: $e" + return Inf + end +end + +""" + get_alignment_transform(probe_mol::Molecule, ref_mol::Molecule; + probe_conf_id::Int=-1, ref_conf_id::Int=-1, + weights=nothing, reflect::Bool=false) -> Matrix{Float64} + +Compute the transformation matrix required to align a probe molecule to a reference molecule. + +# Arguments + + - `probe_mol::Molecule`: The molecule to be aligned + - `ref_mol::Molecule`: The reference molecule + - `probe_conf_id::Int=-1`: Conformer ID of the probe molecule (-1 for default) + - `ref_conf_id::Int=-1`: Conformer ID of the reference molecule (-1 for default) + - `weights=nothing`: Optional weights for atoms during alignment + - `reflect::Bool=false`: Whether to allow reflection during alignment + +# Returns + + - `Matrix{Float64}`: 4x4 transformation matrix for the alignment + +# Notes + + - For explicit atom mapping, use the `align_mol` function instead + - The transformation matrix can be applied using `apply_transform` + +# Example + +```julia +mol1_base = mol_from_smiles("CCO") +mol2_base = mol_from_smiles("CCO") +conformers1 = generate_3d_conformers(mol1_base, 1) +conformers2 = generate_3d_conformers(mol2_base, 1) +mol1 = conformers1[1].molecule +mol2 = conformers2[1].molecule +transform = get_alignment_transform(mol1, mol2) +``` +""" +function get_alignment_transform( + probe_mol::Molecule, + ref_mol::Molecule; + probe_conf_id::Int = -1, + ref_conf_id::Int = -1, + weights = nothing, + reflect::Bool = false, +) + if !probe_mol.valid || !ref_mol.valid + throw(ArgumentError("Both molecules must be valid")) + end + + try + if weights === nothing + transform_py = _get_alignment_transform( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol; + prbCid = probe_conf_id, + refCid = ref_conf_id, + reflect = reflect, + ) + else + weights_py = pylist(weights) + transform_py = _get_alignment_transform_with_weights( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol, + weights_py; + prbCid = probe_conf_id, + refCid = ref_conf_id, + reflect = reflect, + ) + end + + # RDKit returns a tuple (RMSD, transform_matrix) + # Extract just the transformation matrix (second element) + transform_matrix = transform_py[1] # Python 0-based indexing, so [1] is second element + transform_array = pyconvert(Array, transform_matrix) + return reshape(transform_array, 4, 4) + catch e + @warn "Error in get_alignment_transform: $e" + return Matrix{Float64}(I, 4, 4) # Return identity matrix on error + end +end + +""" + random_transform(mol::Molecule; conf_id::Int=-1, + seed::Union{Int,Nothing}=nothing) -> Molecule + +Perform a random rigid body transformation (rotation + translation) on a molecule. + +# Arguments + + - `mol::Molecule`: The molecule to transform + - `conf_id::Int=-1`: Conformer ID to transform (-1 for default) + - `seed::Union{Int,Nothing}=nothing`: Random seed for reproducibility + +# Returns + + - `Molecule`: The transformed molecule (original molecule is also modified) + +# Example + +```julia +mol_base = mol_from_smiles("CCO") +conformers = generate_3d_conformers(mol_base, 1) +mol = conformers[1].molecule +transformed_mol = random_transform(mol; seed = 42) +``` +""" +function random_transform( + mol::Molecule; conf_id::Int = -1, seed::Union{Int, Nothing} = nothing +) + if !mol.valid + throw(ArgumentError("Molecule must be valid")) + end + + try + # Pass seed directly to _random_transform (RDKit's RandomTransform accepts seed parameter) + seed_value = seed === nothing ? -1 : seed + _random_transform(mol._rdkit_mol; confId = conf_id, seed = seed_value) + + return mol + catch e + @warn "Error in random_transform: $e" + return mol + end +end + +""" + apply_transform(mol::Molecule, transform::Matrix{Float64}; conf_id::Int=-1) -> Molecule + +Apply a transformation matrix to a molecule's coordinates. + +# Arguments + + - `mol::Molecule`: The molecule to transform + - `transform::Matrix{Float64}`: 4x4 transformation matrix + - `conf_id::Int=-1`: Conformer ID to transform (-1 for default) + +# Returns + + - `Molecule`: The transformed molecule (original molecule is also modified) + +# Example + +```julia +mol_base = mol_from_smiles("CCO") +conformers = generate_3d_conformers(mol_base, 1) +mol = conformers[1].molecule +# Get identity transformation as example +transform = Matrix{Float64}(I, 4, 4) +transformed_mol = apply_transform(mol, transform) +``` +""" +function apply_transform(mol::Molecule, transform::Matrix{Float64}; conf_id::Int = -1) + if !mol.valid + throw(ArgumentError("Molecule must be valid")) + end + + if size(transform) != (4, 4) + throw(ArgumentError("Transform matrix must be 4x4")) + end + + try + # Get the specific conformer + conf = mol._rdkit_mol.GetConformer(conf_id) + + # Convert Julia matrix to numpy array + numpy = pyimport("numpy") + transform_matrix = numpy.array(transform; dtype = numpy.float64) + + # Apply the transformation directly + rdMolTransforms = pyimport("rdkit.Chem.rdMolTransforms") + rdMolTransforms.TransformConformer(conf, transform_matrix) + + return mol + catch e + @warn "Error in apply_transform: $e" + return mol + end +end + +""" + O3AResult + +Structure to hold results from O3A (Open3DAlign) alignment operations. + +# Fields + + - `score::Float64`: O3A alignment score (higher values indicate better alignment) + - `rmsd::Float64`: Root Mean Square Deviation after alignment in Å + - `transform::Matrix{Float64}`: 4x4 transformation matrix applied to the probe molecule + - `matched_atoms::Vector{Tuple{Int,Int}}`: Pairs of matched atom indices using 1-based indexing (probe_idx, ref_idx) + +# Notes + + - Failed alignments return `score = -1.0` and `rmsd = Inf` + - The transformation matrix is in homogeneous coordinates format + - Atom indices in `matched_atoms` use Julia's 1-based indexing convention +""" +struct O3AResult + score::Float64 + rmsd::Float64 + transform::Matrix{Float64} + matched_atoms::Vector{Tuple{Int, Int}} +end + +function Base.show(io::IO, result::O3AResult) + print( + io, + "O3AResult(score=$(round(result.score, digits=3)), rmsd=$(round(result.rmsd, digits=3)), $(length(result.matched_atoms)) matched atoms)", + ) +end + +""" + get_o3a(probe_mol::Molecule, ref_mol::Molecule; + probe_conf_id::Int=-1, ref_conf_id::Int=-1, + reflect::Bool=false, accuracy::Float64=0.0001, + attempt_generic_features::Bool=true, + prune_conformers::Bool=true) -> O3AResult + +Perform Open3DAlign (O3A) alignment using MMFF atom types for molecular overlay. +This method aligns molecules based on their 3D pharmacophoric features using MMFF +molecular properties to define feature points. + +# Arguments + + - `probe_mol::Molecule`: The molecule to be aligned (will be modified in place) + - `ref_mol::Molecule`: The reference molecule to align to (remains unchanged) + - `probe_conf_id::Int=-1`: Conformer ID of the probe molecule (-1 for default/first conformer) + - `ref_conf_id::Int=-1`: Conformer ID of the reference molecule (-1 for default/first conformer) + - `reflect::Bool=false`: Whether to allow reflection during alignment + - `accuracy::Float64=0.0001`: Accuracy threshold for feature matching (currently not used) + - `attempt_generic_features::Bool=true`: Whether to use generic pharmacophoric features (currently not used) + - `prune_conformers::Bool=true`: Whether to prune conformers during alignment (currently not used) + +# Returns + + - `O3AResult`: Structure containing: + + + `score::Float64`: O3A alignment score (higher is better) + + `rmsd::Float64`: RMSD after alignment in Å + + `transform::Matrix{Float64}`: 4x4 transformation matrix applied to probe + + `matched_atoms::Vector{Tuple{Int,Int}}`: Pairs of matched atom indices (probe, reference) + +# Notes + + - Both molecules must have 3D coordinates (conformers) + - The probe molecule is modified in place during alignment + - MMFF molecular properties are automatically computed for feature generation + - Returns failed result (score=-1.0, rmsd=Inf) if alignment fails + +# Example + +```julia +mol1_base = mol_from_smiles("c1ccc(cc1)CCN") # phenethylamine +mol2_base = mol_from_smiles("c1ccc(cc1)CCNC") # N-methylphenethylamine +conformers1 = generate_3d_conformers(mol1_base, 1) +conformers2 = generate_3d_conformers(mol2_base, 1) +mol1 = conformers1[1].molecule +mol2 = conformers2[1].molecule +result = get_o3a(mol1, mol2) +``` +""" +function get_o3a( + probe_mol::Molecule, + ref_mol::Molecule; + probe_conf_id::Int = -1, + ref_conf_id::Int = -1, + reflect::Bool = false, + accuracy::Float64 = 0.0001, + attempt_generic_features::Bool = true, + prune_conformers::Bool = true, +) + if !probe_mol.valid || !ref_mol.valid + throw(ArgumentError("Both molecules must be valid")) + end + + try + # Get O3A alignment object + o3a = _get_o3a( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol; + prbCid = probe_conf_id, + refCid = ref_conf_id, + reflect = reflect, + accuracy = accuracy, + attemptGenericFeatures = attempt_generic_features, + pruneConfs = prune_conformers, + ) + + # Perform alignment and get RMSD + rmsd = pyconvert(Float64, o3a.Align()) + + # Get score + score = pyconvert(Float64, o3a.Score()) + + # Get transformation matrix + transform_py = o3a.Trans() + # Trans() returns a tuple (RMSD, transform_matrix), get the matrix (second element) + transform_matrix = transform_py[1] # Python 0-based indexing, so [1] is second element + transform_array = pyconvert(Array, transform_matrix) + transform = transform_array # Already a 4x4 matrix, no reshape needed + + # Get matched atoms + matches = o3a.Matches() + matched_atoms = Tuple{Int, Int}[] + for match in matches + probe_idx = pyconvert(Int, match[0]) + 1 # Convert to 1-based indexing + ref_idx = pyconvert(Int, match[1]) + 1 + push!(matched_atoms, (probe_idx, ref_idx)) + end + + return O3AResult(score, rmsd, transform, matched_atoms) + catch e + @warn "Error in get_o3a: $e" + return O3AResult(-1.0, Inf, Matrix{Float64}(I, 4, 4), Tuple{Int, Int}[]) + end +end + +""" + get_crippen_o3a(probe_mol::Molecule, ref_mol::Molecule; + probe_conf_id::Int=-1, ref_conf_id::Int=-1, + reflect::Bool=false, accuracy::Float64=0.0001, + attempt_generic_features::Bool=true, + prune_conformers::Bool=true) -> O3AResult + +Perform Open3DAlign (O3A) alignment using Crippen atom contributions for molecular overlay. +This method uses Crippen LogP and molar refractivity contributions to define feature points +for pharmacophore-based alignment. + +# Arguments + + - `probe_mol::Molecule`: The molecule to be aligned (will be modified in place) + - `ref_mol::Molecule`: The reference molecule to align to (remains unchanged) + - `probe_conf_id::Int=-1`: Conformer ID of the probe molecule (-1 for default/first conformer) + - `ref_conf_id::Int=-1`: Conformer ID of the reference molecule (-1 for default/first conformer) + - `reflect::Bool=false`: Whether to allow reflection during alignment + - `accuracy::Float64=0.0001`: Accuracy threshold for feature matching (currently not used) + - `attempt_generic_features::Bool=true`: Whether to use generic pharmacophoric features (currently not used) + - `prune_conformers::Bool=true`: Whether to prune conformers during alignment (currently not used) + +# Returns + + - `O3AResult`: Structure containing: + + + `score::Float64`: O3A alignment score (higher is better) + + `rmsd::Float64`: RMSD after alignment in Å + + `transform::Matrix{Float64}`: 4x4 transformation matrix applied to probe + + `matched_atoms::Vector{Tuple{Int,Int}}`: Pairs of matched atom indices (probe, reference) + +# Notes + + - Both molecules must have 3D coordinates (conformers) + - The probe molecule is modified in place during alignment + - Crippen contributions (LogP and molar refractivity) are automatically computed for feature generation + - Returns failed result (score=-1.0, rmsd=Inf) if alignment fails + - Generally more robust than MMFF-based alignment for diverse molecule types + +# Example + +```julia +mol1_base = mol_from_smiles("CCc1ccccc1") # ethylbenzene +mol2_base = mol_from_smiles("CCc1ccc(O)cc1") # 4-ethylphenol +conformers1 = generate_3d_conformers(mol1_base, 1) +conformers2 = generate_3d_conformers(mol2_base, 1) +mol1 = conformers1[1].molecule +mol2 = conformers2[1].molecule +result = get_crippen_o3a(mol1, mol2) +``` +""" +function get_crippen_o3a( + probe_mol::Molecule, + ref_mol::Molecule; + probe_conf_id::Int = -1, + ref_conf_id::Int = -1, + reflect::Bool = false, + accuracy::Float64 = 0.0001, + attempt_generic_features::Bool = true, + prune_conformers::Bool = true, +) + if !probe_mol.valid || !ref_mol.valid + throw(ArgumentError("Both molecules must be valid")) + end + + try + # Get Crippen O3A alignment object + o3a = _get_crippen_o3a( + probe_mol._rdkit_mol, + ref_mol._rdkit_mol; + prbCid = probe_conf_id, + refCid = ref_conf_id, + reflect = reflect, + accuracy = accuracy, + attemptGenericFeatures = attempt_generic_features, + pruneConfs = prune_conformers, + ) + + # Perform alignment and get RMSD + rmsd = pyconvert(Float64, o3a.Align()) + + # Get score + score = pyconvert(Float64, o3a.Score()) + + # Get transformation matrix + transform_py = o3a.Trans() + # Trans() returns a tuple (RMSD, transform_matrix), get the matrix (second element) + transform_matrix = transform_py[1] # Python 0-based indexing, so [1] is second element + transform_array = pyconvert(Array, transform_matrix) + transform = transform_array # Already a 4x4 matrix, no reshape needed + + # Get matched atoms + matches = o3a.Matches() + matched_atoms = Tuple{Int, Int}[] + for match in matches + probe_idx = pyconvert(Int, match[0]) + 1 # Convert to 1-based indexing + ref_idx = pyconvert(Int, match[1]) + 1 + push!(matched_atoms, (probe_idx, ref_idx)) + end + + return O3AResult(score, rmsd, transform, matched_atoms) + catch e + @warn "Error in get_crippen_o3a: $e" + return O3AResult(-1.0, Inf, Matrix{Float64}(I, 4, 4), Tuple{Int, Int}[]) + end +end + +""" + o3a_align!(probe_mol::Molecule, ref_mol::Molecule, alignment_method::Symbol=:mmff; + probe_conf_id::Int=-1, ref_conf_id::Int=-1, + reflect::Bool=false, accuracy::Float64=0.0001) -> O3AResult + +Convenience function to perform O3A (Open3DAlign) alignment and modify the probe molecule in place. +This function provides a simple interface to choose between MMFF-based or Crippen-based alignment. + +# Arguments + + - `probe_mol::Molecule`: The molecule to be aligned (will be modified in place) + - `ref_mol::Molecule`: The reference molecule to align to (remains unchanged) + - `alignment_method::Symbol=:mmff`: Alignment method (:mmff or :crippen) + + + `:mmff`: Uses MMFF molecular properties for feature generation (calls `get_o3a`) + + `:crippen`: Uses Crippen LogP/molar refractivity contributions (calls `get_crippen_o3a`) + - `probe_conf_id::Int=-1`: Conformer ID of the probe molecule (-1 for default/first conformer) + - `ref_conf_id::Int=-1`: Conformer ID of the reference molecule (-1 for default/first conformer) + - `reflect::Bool=false`: Whether to allow reflection during alignment + - `accuracy::Float64=0.0001`: Accuracy threshold for feature matching (currently not used) + +# Returns + + - `O3AResult`: Structure containing: + + + `score::Float64`: O3A alignment score (higher is better) + + `rmsd::Float64`: RMSD after alignment in Å + + `transform::Matrix{Float64}`: 4x4 transformation matrix applied to probe + + `matched_atoms::Vector{Tuple{Int,Int}}`: Pairs of matched atom indices (probe, reference) + +# Notes + + - Both molecules must have 3D coordinates (conformers) + - The probe molecule is modified in place during alignment + - Returns failed result (score=-1.0, rmsd=Inf) if alignment fails + - For most cases, `:crippen` method is more robust than `:mmff` + +# Example + +```julia +mol1_base = mol_from_smiles("c1ccccc1CCN") +mol2_base = mol_from_smiles("c1ccccc1CCNC") +conformers1 = generate_3d_conformers(mol1_base, 1) +conformers2 = generate_3d_conformers(mol2_base, 1) +mol1 = conformers1[1].molecule +mol2 = conformers2[1].molecule + +# MMFF-based alignment +result1 = o3a_align!(mol1, mol2, :mmff) +println("MMFF result: \$(result1)") + +# Crippen-based alignment (generally more robust) +result2 = o3a_align!(mol1, mol2, :crippen) +println("Crippen result: \$(result2)") +``` +""" +function o3a_align!( + probe_mol::Molecule, + ref_mol::Molecule, + alignment_method::Symbol = :mmff; + probe_conf_id::Int = -1, + ref_conf_id::Int = -1, + reflect::Bool = false, + accuracy::Float64 = 0.0001, +) + if alignment_method == :mmff + return get_o3a( + probe_mol, + ref_mol; + probe_conf_id = probe_conf_id, + ref_conf_id = ref_conf_id, + reflect = reflect, + accuracy = accuracy, + ) + elseif alignment_method == :crippen + return get_crippen_o3a( + probe_mol, + ref_mol; + probe_conf_id = probe_conf_id, + ref_conf_id = ref_conf_id, + reflect = reflect, + accuracy = accuracy, + ) + else + throw(ArgumentError("alignment_method must be :mmff or :crippen")) + end +end diff --git a/src/rdkit.jl b/src/rdkit.jl index 73c6472..f29b28e 100644 --- a/src/rdkit.jl +++ b/src/rdkit.jl @@ -488,7 +488,9 @@ function _get_hashed_atom_pair_fingerprint_as_bit_vect(mol::Py; nBits::Int) end function _get_hashed_topological_torsion_fingerprint_as_bit_vect(mol::Py; nBits::Int) @pyconst( - pyimport("rdkit.Chem.rdMolDescriptors").GetHashedTopologicalTorsionFingerprintAsBitVect + pyimport( + "rdkit.Chem.rdMolDescriptors" + ).GetHashedTopologicalTorsionFingerprintAsBitVect )( mol; nBits = nBits ) @@ -882,7 +884,7 @@ end function _set_atom_mapping_numbers(mol::Py, map_nums::Vector{Int}) for (i, map_num) in enumerate(map_nums) - atom = mol.GetAtomWithIdx(i-1) + atom = mol.GetAtomWithIdx(i - 1) atom.SetAtomMapNum(map_num) end end @@ -965,3 +967,231 @@ function _explicit_pharmacophore_from_mol(mol::Py, feature_factory::Py; conf_id: return feature_list end + +# Molecular alignment functions (rdMolAlign module) +function _align_mol( + probe_mol::Py, + ref_mol::Py; + prbCid::Int = -1, + refCid::Int = -1, + reflect::Bool = false, + maxIters::Int = 50, +) + @pyconst(pyimport("rdkit.Chem.rdMolAlign").AlignMol)( + probe_mol, + ref_mol; + prbCid = prbCid, + refCid = refCid, + reflect = reflect, + maxIters = maxIters, + ) +end + +function _align_mol_with_map( + probe_mol::Py, + ref_mol::Py, + atom_map::Py, + weights::Py; + prbCid::Int = -1, + refCid::Int = -1, + reflect::Bool = false, + maxIters::Int = 50, +) + @pyconst(pyimport("rdkit.Chem.rdMolAlign").AlignMol)( + probe_mol, + ref_mol; + prbCid = prbCid, + refCid = refCid, + atomMap = atom_map, + weights = weights, + reflect = reflect, + maxIters = maxIters, + ) +end + +function _calc_rms( + probe_mol::Py, ref_mol::Py; prbCid::Int = -1, refCid::Int = -1, transform::Bool = false +) + if transform + # Use GetBestRMS when transformation is requested + @pyconst(pyimport("rdkit.Chem.rdMolAlign").GetBestRMS)( + probe_mol, ref_mol, prbCid, refCid, pylist([]), 1000000, true, pylist([]) + ) + else + # Use CalcRMS for static RMS calculation + @pyconst(pyimport("rdkit.Chem.rdMolAlign").CalcRMS)( + probe_mol, ref_mol, prbCid, refCid, pylist([]), 1000000, true, pylist([]) + ) + end +end + +function _calc_rms_with_weights( + probe_mol::Py, + ref_mol::Py, + weights::Py; + prbCid::Int = -1, + refCid::Int = -1, + transform::Bool = false, +) + if transform + # Use GetBestRMS when transformation is requested + @pyconst(pyimport("rdkit.Chem.rdMolAlign").GetBestRMS)( + probe_mol, ref_mol, prbCid, refCid, pylist([]), 1000000, true, weights + ) + else + # Use CalcRMS for static RMS calculation + @pyconst(pyimport("rdkit.Chem.rdMolAlign").CalcRMS)( + probe_mol, ref_mol, prbCid, refCid, pylist([]), 1000000, true, weights + ) + end +end + +function _get_best_rms( + probe_mol::Py, + ref_mol::Py; + prbCid::Int = -1, + refCid::Int = -1, + maxMatches::Int = 1000000, +) + @pyconst(pyimport("rdkit.Chem.rdMolAlign").GetBestRMS)( + probe_mol, ref_mol, prbCid, refCid, pylist([]), maxMatches, true, pylist([]) + ) +end + +function _get_best_rms_with_weights( + probe_mol::Py, + ref_mol::Py, + weights::Py; + prbCid::Int = -1, + refCid::Int = -1, + maxMatches::Int = 1000000, +) + @pyconst(pyimport("rdkit.Chem.rdMolAlign").GetBestRMS)( + probe_mol, ref_mol, prbCid, refCid, pylist([]), maxMatches, true, weights + ) +end + +function _get_alignment_transform( + probe_mol::Py, ref_mol::Py; prbCid::Int = -1, refCid::Int = -1, reflect::Bool = false +) + @pyconst(pyimport("rdkit.Chem.rdMolAlign").GetAlignmentTransform)( + probe_mol, ref_mol; prbCid = prbCid, refCid = refCid, reflect = reflect + ) +end + +function _get_alignment_transform_with_weights( + probe_mol::Py, + ref_mol::Py, + weights::Py; + prbCid::Int = -1, + refCid::Int = -1, + reflect::Bool = false, +) + @pyconst(pyimport("rdkit.Chem.rdMolAlign").GetAlignmentTransform)( + probe_mol, + ref_mol; + prbCid = prbCid, + refCid = refCid, + atomMap = pylist([]), + weights = weights, + reflect = reflect, + ) +end + +function _random_transform(mol::Py; confId::Int = -1, seed::Int = -1) + @pyconst(pyimport("rdkit.Chem.rdMolAlign").RandomTransform)(mol, confId, seed) +end + +function _transform_conformer(mol::Py, transform::Vector; confId::Int = -1) + # Get the specific conformer + conf = mol.GetConformer(confId) + + # Convert the transform to a numpy array (4x4 matrix) + numpy = pyimport("numpy") + transform_matrix = numpy.array(reshape(transform, 4, 4); dtype = numpy.float64) + + # Apply the transformation + @pyconst(pyimport("rdkit.Chem.rdMolTransforms").TransformConformer)( + conf, transform_matrix + ) +end + +# Remove _set_random_seed function - not needed since RandomTransform accepts seed parameter directly + +# O3A (Open3DAlign) functions +function _get_o3a( + probe_mol::Py, + ref_mol::Py; + prbCid::Int = -1, + refCid::Int = -1, + reflect::Bool = false, + accuracy::Float64 = 0.0001, + attemptGenericFeatures::Bool = true, + pruneConfs::Bool = true, +) + # Get MMFF properties for both molecules (required parameters) + rdMolDescriptors = pyimport("rdkit.Chem.rdMolDescriptors") + pybuiltins = pyimport("builtins") + + # Try to get MMFF properties, use None if they can't be generated + prb_props = pybuiltins.None + ref_props = pybuiltins.None + try + prb_props = rdMolDescriptors.MMFFGetMoleculeProperties(probe_mol) + ref_props = rdMolDescriptors.MMFFGetMoleculeProperties(ref_mol) + catch + # Keep the None values assigned above + end + + @pyconst(pyimport("rdkit.Chem.rdMolAlign").GetO3A)( + probe_mol, + ref_mol, + prb_props, + ref_props, + prbCid, + refCid, + reflect, + 50, + 0, + pylist([]), + pylist([]), + ) +end + +function _get_crippen_o3a( + probe_mol::Py, + ref_mol::Py; + prbCid::Int = -1, + refCid::Int = -1, + reflect::Bool = false, + accuracy::Float64 = 0.0001, + attemptGenericFeatures::Bool = true, + pruneConfs::Bool = true, +) + # Get Crippen contributions for both molecules (required parameters) + rdMolDescriptors = pyimport("rdkit.Chem.rdMolDescriptors") + + # Try to get Crippen contributions, use empty lists if they can't be generated + prb_contribs = pylist([]) + ref_contribs = pylist([]) + try + prb_contribs = rdMolDescriptors.getCrippenAtomContribs(probe_mol) + ref_contribs = rdMolDescriptors.getCrippenAtomContribs(ref_mol) + catch + # Keep the empty lists assigned above + end + + @pyconst(pyimport("rdkit.Chem.rdMolAlign").GetCrippenO3A)( + probe_mol, + ref_mol, + prb_contribs, + ref_contribs, + prbCid, + refCid, + reflect, + 50, + 0, + pylist([]), + pylist([]), + ) +end diff --git a/test/runtests.jl b/test/runtests.jl index 2f8cfab..19fa109 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,6 +18,7 @@ using Graphs include("test_drawing.jl") include("test_standardization.jl") include("test_conformers.jl") + include("test_alignment.jl") include("test_error_handling.jl") include("test_reaction.jl") include("test_aqua.jl") diff --git a/test/test_alignment.jl b/test/test_alignment.jl new file mode 100644 index 0000000..28f9875 --- /dev/null +++ b/test/test_alignment.jl @@ -0,0 +1,404 @@ +using Test +using MoleculeFlow +using LinearAlgebra + +@testset "Molecular Alignment Functions" begin + # Helper function to create molecules with 3D conformers + function create_mol_with_3d(smiles::String; optimize::Bool = true) + mol_base = mol_from_smiles(smiles) + @test mol_base.valid # Fail the test if molecule creation fails + conformers = generate_3d_conformers(mol_base, 1; optimize = optimize) + @test !isempty(conformers) # Fail the test if conformer generation fails + return conformers[1].molecule + end + @testset "Basic Alignment Functions" begin + @testset "align_mol function" begin + # Test basic alignment as in docstring + mol1 = create_mol_with_3d("CCO") # ethanol + mol2 = create_mol_with_3d("CCO") # ethanol + + # Basic alignment should return a finite RMSD + rmsd = align_mol(mol1, mol2) + @test rmsd isa Float64 + @test rmsd >= 0.0 + @test isfinite(rmsd) + + # Test with different molecules + mol3 = create_mol_with_3d("CCC") # propane + rmsd2 = align_mol(mol1, mol3) + @test rmsd2 isa Float64 + @test rmsd2 >= 0.0 + @test isfinite(rmsd2) || rmsd2 == Inf # May be Inf if no good alignment + + # Test with weights instead of atom mapping + weights = [1.0, 2.0, 1.0] # Give more weight to second atom (ethanol: C-C-O) + rmsd3 = align_mol(mol1, mol2; weights = weights) + @test rmsd3 isa Float64 + @test rmsd3 >= 0.0 + @test isfinite(rmsd3) + + # Test with reflection + rmsd4 = align_mol(mol1, mol2; reflect = true) + @test rmsd4 isa Float64 + @test rmsd4 >= 0.0 + @test isfinite(rmsd4) + end + + @testset "calc_rms function" begin + # Test RMS calculation as in docstring + mol1 = create_mol_with_3d("CCO") + mol2 = create_mol_with_3d("CCO") + + # Calculate RMS without transformation + rms = calc_rms(mol1, mol2) + @test rms isa Float64 + @test rms >= 0.0 + @test isfinite(rms) + + # Calculate RMS with transformation + rms_transform = calc_rms(mol1, mol2; transform_probe = true) + @test rms_transform isa Float64 + @test rms_transform >= 0.0 + @test isfinite(rms_transform) + @test rms_transform <= rms # Transformed should be better or equal + + # Test with weights (ethanol has 3 atoms: C-C-O) + weights = [1.0, 1.0, 2.0] # Give more weight to the oxygen (3rd atom) + rms_weighted = calc_rms(mol1, mol2; weights = weights) + @test rms_weighted isa Float64 + @test rms_weighted >= 0.0 + @test isfinite(rms_weighted) + end + + @testset "get_best_rms function" begin + # Test optimal RMS as in docstring (benzene has symmetry) + mol1 = create_mol_with_3d("c1ccccc1") # benzene + mol2 = create_mol_with_3d("c1ccccc1") # benzene + + best_rms = get_best_rms(mol1, mol2) + @test best_rms isa Float64 + @test best_rms >= 0.0 + @test isfinite(best_rms) + + # Regular RMS should be >= best RMS due to symmetry consideration + regular_rms = calc_rms(mol1, mol2) + @test regular_rms >= best_rms || abs(regular_rms - best_rms) < 1e-6 + + # Test with weights (benzene has 6 atoms: 6 carbons, no explicit hydrogens) + # Create reasonable weights for all atoms + weights = ones(6) # Equal weights for all 6 carbon atoms + weights[1] = 2.0 # More weight on first carbon + best_rms_weighted = get_best_rms(mol1, mol2; weights = weights) + @test best_rms_weighted isa Float64 + @test best_rms_weighted >= 0.0 + @test isfinite(best_rms_weighted) + end + + @testset "get_alignment_transform function" begin + # Test transformation matrix computation + mol1 = create_mol_with_3d("CCO") + mol2 = create_mol_with_3d("CCO") + + transform = get_alignment_transform(mol1, mol2) + @test transform isa Matrix{Float64} + @test size(transform) == (4, 4) + @test all(isfinite, transform) + + # Test with reflection + transform_reflect = get_alignment_transform(mol1, mol2; reflect = true) + @test transform_reflect isa Matrix{Float64} + @test size(transform_reflect) == (4, 4) + @test all(isfinite, transform_reflect) + + # Test with weights (ethanol has 3 atoms: 2 carbons, 1 oxygen, no explicit hydrogens) + weights = ones(3) # Equal weights for all 3 atoms + weights[3] = 2.0 # More weight on oxygen atom (3rd atom) + transform_weighted = get_alignment_transform(mol1, mol2; weights = weights) + @test transform_weighted isa Matrix{Float64} + @test size(transform_weighted) == (4, 4) + @test all(isfinite, transform_weighted) + end + + @testset "random_transform function" begin + # Test random transformation + mol = create_mol_with_3d("CCO") + + # Apply random transformation + transformed_mol = random_transform(mol; seed = 42) + @test transformed_mol === mol # Should return the same object + @test transformed_mol.valid + + # Test with different seed + mol2 = create_mol_with_3d("CCO") + transformed_mol2 = random_transform(mol2; seed = 123) + @test transformed_mol2.valid + end + + @testset "apply_transform function" begin + # Test applying transformation matrix + mol = create_mol_with_3d("CCO") + + # Apply identity transformation (should not change coordinates significantly) + identity_transform = Matrix{Float64}(I, 4, 4) + transformed_mol = apply_transform(mol, identity_transform) + @test transformed_mol === mol # Should return the same object + @test transformed_mol.valid + + # Test with a translation matrix + translation_transform = Matrix{Float64}(I, 4, 4) + translation_transform[1:3, 4] = [1.0, 2.0, 3.0] # Translate by (1, 2, 3) + transformed_mol2 = apply_transform(mol, translation_transform) + @test transformed_mol2.valid + end + end + + @testset "O3A Alignment Functions" begin + @testset "O3AResult structure" begin + # Test O3AResult structure + result = O3AResult(0.85, 1.23, Matrix{Float64}(I, 4, 4), [(1, 1), (2, 2)]) + @test result.score == 0.85 + @test result.rmsd == 1.23 + @test size(result.transform) == (4, 4) + @test length(result.matched_atoms) == 2 + + # Test string representation + result_str = string(result) + @test occursin("O3AResult", result_str) + @test occursin("score", result_str) + @test occursin("rmsd", result_str) + end + + @testset "get_o3a function" begin + # Test MMFF-based O3A alignment with similar drug-like molecules + mol1 = create_mol_with_3d("CCc1ccccc1") # ethylbenzene + mol2 = create_mol_with_3d("CCc1ccc(O)cc1") # 4-ethylphenol (similar structure) + + result = get_o3a(mol1, mol2) + @test result isa O3AResult + @test result.score isa Float64 + @test result.rmsd isa Float64 + @test result.rmsd >= 0.0 || result.rmsd == Inf + @test size(result.transform) == (4, 4) + @test result.matched_atoms isa Vector{Tuple{Int, Int}} + @test all(isfinite, result.transform) || + result.transform == Matrix{Float64}(I, 4, 4) + + # Test with different parameters + result2 = get_o3a(mol1, mol2; reflect = true, accuracy = 0.001) + @test result2 isa O3AResult + @test isfinite(result2.score) || result2.score == -1.0 # May fail and return -1.0 + @test isfinite(result2.rmsd) || result2.rmsd == Inf # May fail and return Inf + end + + @testset "get_crippen_o3a function" begin + # Test Crippen-based O3A alignment with benzene derivatives + mol1 = create_mol_with_3d("c1ccccc1C") # toluene + mol2 = create_mol_with_3d("c1ccc(C)cc1O") # 4-methylphenol (cresol) + + result = get_crippen_o3a(mol1, mol2) + @test result isa O3AResult + @test result.score isa Float64 + @test result.rmsd isa Float64 + @test result.rmsd >= 0.0 || result.rmsd == Inf # May fail + @test size(result.transform) == (4, 4) + @test result.matched_atoms isa Vector{Tuple{Int, Int}} + @test all(isfinite, result.transform) || + result.transform == Matrix{Float64}(I, 4, 4) + + # Test with different parameters + result2 = get_crippen_o3a(mol1, mol2; attempt_generic_features = false) + @test result2 isa O3AResult + @test isfinite(result2.score) || result2.score == -1.0 + @test isfinite(result2.rmsd) || result2.rmsd == Inf + end + + @testset "o3a_align! function" begin + # Test convenience function with similar aromatic compounds + mol1 = create_mol_with_3d("c1ccc(CC)cc1") # ethylbenzene + mol2 = create_mol_with_3d("c1ccc(CCC)cc1") # propylbenzene + + # MMFF-based alignment + result1 = o3a_align!(mol1, mol2, :mmff) + @test result1 isa O3AResult + @test isfinite(result1.score) || result1.score == -1.0 + @test isfinite(result1.rmsd) || result1.rmsd == Inf + + # Reset molecules for Crippen test + mol3 = create_mol_with_3d("c1ccc(CC)cc1") # ethylbenzene + mol4 = create_mol_with_3d("c1ccc(CCC)cc1") # propylbenzene + + # Crippen-based alignment + result2 = o3a_align!(mol3, mol4, :crippen) + @test result2 isa O3AResult + @test isfinite(result2.score) || result2.score == -1.0 + @test isfinite(result2.rmsd) || result2.rmsd == Inf + + # Test invalid method + @test_throws ArgumentError o3a_align!(mol1, mol2, :invalid) + end + end + + @testset "Error Handling and Edge Cases" begin + @testset "Invalid molecules" begin + valid_mol = create_mol_with_3d("CCO") + + # Create an invalid molecule properly + invalid_mol = mol_from_smiles("INVALID_SMILES") + @test !invalid_mol.valid + + # Test functions with invalid molecules + @test_throws ArgumentError align_mol(invalid_mol, valid_mol) + @test_throws ArgumentError align_mol(valid_mol, invalid_mol) + @test_throws ArgumentError calc_rms(invalid_mol, valid_mol) + @test_throws ArgumentError calc_rms(valid_mol, invalid_mol) + @test_throws ArgumentError get_best_rms(invalid_mol, valid_mol) + @test_throws ArgumentError get_alignment_transform(invalid_mol, valid_mol) + @test_throws ArgumentError random_transform(invalid_mol) + @test_throws ArgumentError apply_transform( + invalid_mol, Matrix{Float64}(I, 4, 4) + ) + @test_throws ArgumentError get_o3a(invalid_mol, valid_mol) + @test_throws ArgumentError get_crippen_o3a(invalid_mol, valid_mol) + end + + @testset "Invalid transformation matrix" begin + mol = create_mol_with_3d("CCO") + + # Test with wrong size matrix + wrong_size_matrix = Matrix{Float64}(I, 3, 3) + @test_throws ArgumentError apply_transform(mol, wrong_size_matrix) + + # Test with non-square matrix + non_square_matrix = rand(4, 3) + @test_throws ArgumentError apply_transform(mol, non_square_matrix) + end + + @testset "Molecules without 3D conformers" begin + # Test behavior with molecules that have no 3D conformers + mol1_2d = mol_from_smiles("CCO") # Only 2D structure + mol2_2d = mol_from_smiles("CCC") # Only 2D structure + + # These should handle gracefully and typically return Inf + rmsd = align_mol(mol1_2d, mol2_2d) + @test rmsd isa Float64 + @test rmsd == Inf || rmsd >= 0.0 # Should be Inf or positive + + rms = calc_rms(mol1_2d, mol2_2d) + @test rms isa Float64 + @test rms == Inf || rms >= 0.0 # Should be Inf or positive + + # Test that transformation still returns identity on failure + transform = get_alignment_transform(mol1_2d, mol2_2d) + @test transform isa Matrix{Float64} + @test size(transform) == (4, 4) + # May be identity matrix on failure + end + + @testset "Identical molecules" begin + # Test with identical molecules + mol1_base = mol_from_smiles("CCO") + mol2_base = mol_from_smiles("CCO") + @test mol1_base.valid && mol2_base.valid + conformers1 = generate_3d_conformers(mol1_base, 1; random_seed = 42) + conformers2 = generate_3d_conformers(mol2_base, 1; random_seed = 42) # Same seed for identical conformers + @test !isempty(conformers1) && !isempty(conformers2) + + mol1 = conformers1[1].molecule + mol2 = conformers2[1].molecule + + rmsd = align_mol(mol1, mol2) + @test rmsd isa Float64 + @test rmsd >= 0.0 # Should be very small but may not be exactly 0 + + rms = calc_rms(mol1, mol2) + @test rms isa Float64 + @test rms >= 0.0 + end + + @testset "Different sized molecules" begin + # Test alignment between molecules of different sizes + small_mol = create_mol_with_3d("CC") # ethane + large_mol = create_mol_with_3d("CCCCCCCCCC") # decane + + # Should handle gracefully + rmsd = align_mol(small_mol, large_mol) + @test rmsd isa Float64 + + rms = calc_rms(small_mol, large_mol) + @test rms isa Float64 + end + end + + @testset "Parameter Validation" begin + @testset "Conformer ID validation" begin + mol1 = create_mol_with_3d("CCO") + mol2 = create_mol_with_3d("CCC") + + # Test with valid conformer IDs (0-based in RDKit) + rmsd1 = align_mol(mol1, mol2; probe_conf_id = 0, ref_conf_id = 0) + @test rmsd1 isa Float64 + + # Test with -1 (default) + rmsd2 = align_mol(mol1, mol2; probe_conf_id = -1, ref_conf_id = -1) + @test rmsd2 isa Float64 + + # Test with invalid conformer IDs (should handle gracefully) + rmsd3 = align_mol(mol1, mol2; probe_conf_id = 999, ref_conf_id = 999) + @test rmsd3 isa Float64 # May be Inf but should not crash + end + + @testset "Max iterations validation" begin + mol1 = create_mol_with_3d("CCO") + mol2 = create_mol_with_3d("CCO") + + # Test with different max_iterations values + rmsd1 = align_mol(mol1, mol2; max_iterations = 10) + @test rmsd1 isa Float64 + @test rmsd1 >= 0.0 + + rmsd2 = align_mol(mol1, mol2; max_iterations = 100) + @test rmsd2 isa Float64 + @test rmsd2 >= 0.0 + + # Test with minimal iterations + rmsd3 = align_mol(mol1, mol2; max_iterations = 1) + @test rmsd3 isa Float64 + @test rmsd3 >= 0.0 + end + + @testset "Weight validation" begin + mol1 = create_mol_with_3d("CCO") # ethanol: 3 atoms total (no explicit hydrogens) + mol2 = create_mol_with_3d("CCO") # ethanol: 3 atoms total (no explicit hydrogens) + + # Test align_mol with valid weights for all atoms + weights = ones(3) # Equal weights for all 3 atoms + weights[3] = 2.0 # More weight on oxygen (3rd atom) + rmsd1 = align_mol(mol1, mol2; weights = weights) + @test rmsd1 isa Float64 + + # Test calc_rms with valid weights + rms1 = calc_rms(mol1, mol2; weights = weights) + @test rms1 isa Float64 + + # Test get_best_rms with valid weights + best_rms1 = get_best_rms(mol1, mol2; weights = weights) + @test best_rms1 isa Float64 + + # Test get_alignment_transform with valid weights + transform1 = get_alignment_transform(mol1, mol2; weights = weights) + @test transform1 isa Matrix{Float64} + @test size(transform1) == (4, 4) + + # Test with empty weights (should work) + empty_weights = Float64[] + rmsd2 = align_mol(mol1, mol2; weights = empty_weights) + @test rmsd2 isa Float64 + + rms2 = calc_rms(mol1, mol2; weights = empty_weights) + @test rms2 isa Float64 + + best_rms2 = get_best_rms(mol1, mol2; weights = empty_weights) + @test best_rms2 isa Float64 + end + end +end diff --git a/test/test_all_descriptors.jl b/test/test_all_descriptors.jl index eb8f467..a990683 100644 --- a/test/test_all_descriptors.jl +++ b/test/test_all_descriptors.jl @@ -4,6 +4,7 @@ using MoleculeFlow @testset "All Descriptors" begin @testset "calc_all_descriptors" begin mol = mol_from_smiles("CCO") + @test mol.valid descriptors = calc_all_descriptors(mol) @test isa(descriptors, Dict{Symbol, Any}) diff --git a/test/test_atom_properties.jl b/test/test_atom_properties.jl index 1769ca1..62795e7 100644 --- a/test/test_atom_properties.jl +++ b/test/test_atom_properties.jl @@ -4,6 +4,7 @@ using MoleculeFlow @testset "Extended Atom Properties" begin @testset "Basic Extended Properties" begin mol = mol_from_smiles("CCO") + @test mol.valid atom_o = get_atom(mol, 3) # Oxygen # Test radical electrons @@ -24,6 +25,7 @@ using MoleculeFlow @testset "Chirality Properties" begin # Test with a non-chiral molecule mol = mol_from_smiles("CCO") + @test mol.valid atom = get_atom(mol, 1) @test !is_chiral_center(atom) @test get_cip_code(atom) === missing @@ -39,12 +41,14 @@ using MoleculeFlow @testset "Ring Properties" begin # Test with benzene mol = mol_from_smiles("c1ccccc1") + @test mol.valid atom = get_atom(mol, 1) @test is_in_ring(atom) @test get_ring_size(atom) == 6 # Test with non-ring atom mol2 = mol_from_smiles("CCCC") + @test mol2.valid atom2 = get_atom(mol2, 1) @test !is_in_ring(atom2) @test get_ring_size(atom2) === missing @@ -52,6 +56,7 @@ using MoleculeFlow @testset "Contribution Properties" begin mol = mol_from_smiles("CCO") + @test mol.valid logp_contrib = get_crippen_log_p_contribution(mol, 1) @test isa(logp_contrib, Union{Float64, Missing}) @@ -71,6 +76,7 @@ using MoleculeFlow @testset "All Atom Properties" begin mol = mol_from_smiles("CCO") + @test mol.valid compute_gasteiger_charges!(mol) carbon_props = get_all_atom_properties(mol, 1) @@ -121,6 +127,7 @@ using MoleculeFlow @testset "Hydrogen Bonding Logic" begin mol = mol_from_smiles("CCN") + @test mol.valid n_atom = get_atom(mol, 3) @test is_hydrogen_donor(n_atom) # NH2 can donate @test is_hydrogen_acceptor(n_atom) # N can accept @@ -147,6 +154,7 @@ using MoleculeFlow @testset "Edge Cases" begin mol = mol_from_smiles("c1ccccc1") + @test mol.valid for i in 1:6 atom = get_atom(mol, i) @test is_aromatic(atom) diff --git a/test/test_atoms.jl b/test/test_atoms.jl index 992af70..91f6538 100644 --- a/test/test_atoms.jl +++ b/test/test_atoms.jl @@ -3,6 +3,7 @@ using MoleculeFlow @testset "Atom Operations" begin mol = mol_from_smiles("CCO") + @test mol.valid # Test getting atoms atoms = get_atoms(mol) @@ -23,4 +24,10 @@ using MoleculeFlow oxygen = get_atom(mol, 3) @test get_symbol(oxygen) == "O" @test get_atomic_number(oxygen) == 8 + + # Test with invalid molecule + invalid_mol = mol_from_smiles("invalid_smiles") + @test !invalid_mol.valid + @test get_atoms(invalid_mol) === missing + @test get_atom(invalid_mol, 1) === missing end diff --git a/test/test_bonds.jl b/test/test_bonds.jl index 1d2f80a..a76d35c 100644 --- a/test/test_bonds.jl +++ b/test/test_bonds.jl @@ -3,6 +3,7 @@ using MoleculeFlow @testset "Bond Operations" begin mol = mol_from_smiles("CCO") + @test mol.valid # Test getting bonds from atom bonds = get_bonds_from_atom(mol, 1) # First carbon @@ -16,4 +17,9 @@ using MoleculeFlow @test isa(get_bond_type(bond), String) @test isa(is_aromatic(bond), Bool) end + + # Test with invalid molecule + invalid_mol = mol_from_smiles("invalid_smiles") + @test !invalid_mol.valid + @test get_bonds_from_atom(invalid_mol, 1) === missing end diff --git a/test/test_descriptors.jl b/test/test_descriptors.jl index dfd7bc0..667ea02 100644 --- a/test/test_descriptors.jl +++ b/test/test_descriptors.jl @@ -3,6 +3,7 @@ using MoleculeFlow @testset "Molecular Descriptors" begin mol = mol_from_smiles("CC(C)O") + @test mol.valid @test molecular_weight(mol) ≈ 60.096 atol = 0.01 @test heavy_atom_count(mol) == 4 @@ -24,6 +25,8 @@ end @testset "Chi Connectivity Indices" begin ethanol = mol_from_smiles("CCO") benzene = mol_from_smiles("c1ccccc1") + @test ethanol.valid + @test benzene.valid # Test Chi0n series @test isa(chi0n(ethanol), Float64) @@ -54,6 +57,8 @@ end @testset "Kappa Shape Descriptors" begin ethanol = mol_from_smiles("CCO") benzene = mol_from_smiles("c1ccccc1") + @test ethanol.valid + @test benzene.valid @test isa(kappa2(ethanol), Float64) @test isa(kappa3(ethanol), Float64) @@ -68,6 +73,7 @@ end @testset "E-State Descriptors" begin ethanol = mol_from_smiles("CCO") + @test ethanol.valid max_estate = max_e_state_index(ethanol) min_estate = min_e_state_index(ethanol) @@ -87,6 +93,10 @@ end chloroform = mol_from_smiles("CCl") # CH3Cl pyridine = mol_from_smiles("c1cccnc1") # C5H5N dmso = mol_from_smiles("CS(=O)C") # C2H6OS + @test ethanol.valid + @test chloroform.valid + @test pyridine.valid + @test dmso.valid # Test carbon counts @test num_carbons(ethanol) == 2 @@ -119,6 +129,8 @@ end @testset "Information Content (IPC)" begin ethanol = mol_from_smiles("CCO") benzene = mol_from_smiles("c1ccccc1") + @test ethanol.valid + @test benzene.valid @test isa(ipc(ethanol), Float64) @test isa(ipc(benzene), Float64) diff --git a/test/test_error_handling.jl b/test/test_error_handling.jl index 9dbd5f5..79e2441 100644 --- a/test/test_error_handling.jl +++ b/test/test_error_handling.jl @@ -3,11 +3,13 @@ using MoleculeFlow @testset "Error Handling" begin bad_mol = mol_from_smiles("invalid") + @test !bad_mol.valid @test ismissing(molecular_weight(bad_mol)) @test ismissing(morgan_fingerprint(bad_mol)) @test ismissing(get_atoms(bad_mol)) good_mol = mol_from_smiles("CCO") + @test good_mol.valid @test ismissing(tanimoto_similarity(good_mol, bad_mol)) @test ismissing(tanimoto_similarity(bad_mol, good_mol)) end diff --git a/test/test_fingerprints.jl b/test/test_fingerprints.jl index 15527de..4553480 100644 --- a/test/test_fingerprints.jl +++ b/test/test_fingerprints.jl @@ -3,6 +3,7 @@ using MoleculeFlow @testset "Fingerprints" begin mol = mol_from_smiles("CCO") + @test mol.valid morgan_fp = morgan_fingerprint(mol) @test isa(morgan_fp, Vector{Bool}) @@ -18,4 +19,11 @@ using MoleculeFlow morgan_fp_256 = morgan_fingerprint(mol; nbits = 256) @test length(morgan_fp_256) == 256 + + # Test with invalid molecule + invalid_mol = mol_from_smiles("invalid_smiles") + @test !invalid_mol.valid + @test morgan_fingerprint(invalid_mol) === missing + @test rdk_fingerprint(invalid_mol) === missing + @test maccs_fingerprint(invalid_mol) === missing end diff --git a/test/test_fragmentation.jl b/test/test_fragmentation.jl index 9df3d76..a3e37a9 100644 --- a/test/test_fragmentation.jl +++ b/test/test_fragmentation.jl @@ -4,6 +4,7 @@ using MoleculeFlow @testset "Molecular Fragmentation" begin @testset "BRICS Decomposition" begin mol = mol_from_smiles("CCCOCCc1ccccc1") + @test mol.valid fragments = brics_decompose(mol) @test isa(fragments, Vector{String}) @@ -20,6 +21,7 @@ using MoleculeFlow @testset "RECAP Decomposition" begin mol = mol_from_smiles("CC(=O)NCCc1ccccc1") + @test mol.valid fragments = recap_decompose(mol) @test isa(fragments, Vector{String}) @@ -33,6 +35,7 @@ using MoleculeFlow @testset "Murcko Scaffolds" begin mol = mol_from_smiles("CCCOCCc1ccccc1") + @test mol.valid scaffold = get_murcko_scaffold(mol) @test isa(scaffold, String) @@ -53,6 +56,7 @@ using MoleculeFlow @testset "Fragment Counting and Splitting" begin mol = mol_from_smiles("CCO.CCC") + @test mol.valid count = get_fragment_count(mol) @test count == 2 @@ -88,6 +92,7 @@ using MoleculeFlow @testset "Bond-based Fragmentation" begin mol = mol_from_smiles("CCCC") + @test mol.valid # Fragment at bond index 1 (second bond, 0-based indexing) fragments = fragment_by_bonds(mol, [1]) diff --git a/test/test_reaction.jl b/test/test_reaction.jl index 82a03ea..7f693c9 100644 --- a/test/test_reaction.jl +++ b/test/test_reaction.jl @@ -5,6 +5,7 @@ using Test # Example: ester hydrolysis rxn_smarts = "[C:1](=O)[O:2][C:3]>>[C:1](=O)[O-].[C:3][O+]" rxn = reaction_from_smarts(rxn_smarts) + @test rxn.valid @test isa(rxn, Reaction) @test rxn.props[:SMARTS] == rxn_smarts @test reaction_to_smarts(rxn) isa String @@ -42,6 +43,7 @@ end rxn = reaction_from_smarts(rxn_smarts) # Ethyl acetate: CC(=O)OCC reactant = mol_from_smiles("CC(=O)OCC") + @test reactant.valid products = run_reaction(rxn, [reactant]) @test isa(products, Vector) @test length(products) > 0 diff --git a/test/test_similarity.jl b/test/test_similarity.jl index b062ab2..8dda540 100644 --- a/test/test_similarity.jl +++ b/test/test_similarity.jl @@ -6,6 +6,10 @@ using MoleculeFlow mol2 = mol_from_smiles("CCC") # propane mol3 = mol_from_smiles("CCO") # ethanol again + @test mol1.valid + @test mol2.valid + @test mol3.valid + # Test Tanimoto similarity sim_different = tanimoto_similarity(mol1, mol2) sim_identical = tanimoto_similarity(mol1, mol3) @@ -25,4 +29,12 @@ using MoleculeFlow @test length(similarities) == 3 @test similarities[1] ≈ 1.0 atol = 0.01 # identical to self @test similarities[3] ≈ 1.0 atol = 0.01 # identical to mol3 + + # Test with invalid molecule + invalid_mol = mol_from_smiles("invalid_smiles") + @test !invalid_mol.valid + @test tanimoto_similarity(mol1, invalid_mol) === missing + @test dice_similarity(mol1, invalid_mol) === missing + @test cosine_similarity(mol1, invalid_mol) === missing + @test bulk_similarity(invalid_mol, [mol1, mol2]) === missing end diff --git a/test/test_standardization.jl b/test/test_standardization.jl index 8b1e407..38411e8 100644 --- a/test/test_standardization.jl +++ b/test/test_standardization.jl @@ -49,6 +49,7 @@ using MoleculeFlow # Test with a molecule that has no reasonable tautomers methane = mol_from_smiles("C") + @test methane.valid tautomers_methane = enumerate_tautomers(methane) @test length(tautomers_methane) >= 1 # Should at least return the original @@ -72,6 +73,7 @@ using MoleculeFlow # Test with simple molecule (should return same) ethanol = mol_from_smiles("CCO") + @test ethanol.valid canonical_ethanol = canonical_tautomer(ethanol) @test canonical_ethanol.valid == true @test mol_to_smiles(canonical_ethanol) == mol_to_smiles(ethanol) diff --git a/test/test_substructure.jl b/test/test_substructure.jl index 98e9b2c..8af2cd5 100644 --- a/test/test_substructure.jl +++ b/test/test_substructure.jl @@ -101,20 +101,14 @@ end end @testset "Phosphorus-Containing Groups" begin - # Test phosphorus groups (if molecules can be created) - try - phosphine = mol_from_smiles("CP(C)C") - @test has_functional_group(phosphine, :phosphine) - catch - @test_skip "Phosphine test skipped - molecule creation failed" - end - - try - phosphine_oxide = mol_from_smiles("CP(=O)(C)C") - @test has_functional_group(phosphine_oxide, :phosphine_oxide) - catch - @test_skip "Phosphine oxide test skipped - molecule creation failed" - end + # Test phosphorus groups + phosphine = mol_from_smiles("CP(C)C") + @test phosphine.valid + @test has_functional_group(phosphine, :phosphine) + + phosphine_oxide = mol_from_smiles("CP(=O)(C)C") + @test phosphine_oxide.valid + @test has_functional_group(phosphine_oxide, :phosphine_oxide) end @testset "Halogen-Containing Groups" begin @@ -142,12 +136,9 @@ end @test has_functional_group(nitro, :nitro) # Azide - try - azide = mol_from_smiles("CC[N-][N+]#N") - @test has_functional_group(azide, :azide) - catch - @test_skip "Azide test skipped - molecule creation failed" - end + azide = mol_from_smiles("CC[N-][N+]#N") + @test azide.valid + @test has_functional_group(azide, :azide) # Imine imine = mol_from_smiles("CC=N") @@ -325,12 +316,9 @@ end @test aspirin_groups[:benzene] # Simple antibiotic-like β-lactam - try - beta_lactam = mol_from_smiles("C1CN(C1=O)C") - @test has_functional_group(beta_lactam, :beta_lactam) - catch - @test_skip "β-lactam test skipped - molecule creation failed" - end + beta_lactam = mol_from_smiles("C1CN(C1=O)C") + @test beta_lactam.valid + @test has_functional_group(beta_lactam, :beta_lactam) end @testset "FUNCTIONAL_GROUPS Dictionary" begin