Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Export for java #215

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions ms2deepscore/models/save_for_java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os

import torch

from ms2deepscore import SettingsMS2Deepscore
from ms2deepscore.models import load_model
from ms2deepscore.train_new_model.train_ms2deepscore import train_ms2ds_model
from tests.create_test_spectra import pesticides_test_spectra
from tests.test_data_generators import create_test_spectra
import numpy as np


def save_model_for_java(input_model_file_name, output_model_file_name):
"""Saves a model in a file format compatible with java"""
model = load_model(input_model_file_name)
nr_of_spectra = 2
# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(
model,
(torch.rand(nr_of_spectra, model.model_settings.number_of_bins()),
torch.rand(nr_of_spectra, model.model_settings.number_of_bins()),
torch.rand(nr_of_spectra, len(model.model_settings.additional_metadata)),
torch.rand(nr_of_spectra, len(model.model_settings.additional_metadata))))

# Save the TorchScript model
traced_script_module.save(output_model_file_name)


def save_model_for_java_output_embeddings(input_model_file_name, output_model_file_name):
"""Saves a model in a file format compatible with java"""
model = load_model(input_model_file_name)
nr_of_spectra = 2
# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(
model.encoder,
(torch.rand(nr_of_spectra, model.model_settings.number_of_bins()),
torch.rand(nr_of_spectra, len(model.model_settings.additional_metadata)),
)
)

# Save the TorchScript model
traced_script_module.save(output_model_file_name)


def create_test_model_for_java(results_folder):
spectra = create_test_spectra(8)
settings = SettingsMS2Deepscore(**{
"mz_bin_width": 1.0,
"epochs": 2, # to speed up tests --> usually many more
"base_dims": (100, 100), # to speed up tests --> usually larger
"embedding_dim": 50, # to speed up tests --> usually larger
"same_prob_bins": np.array([(0, 0.5), (0.5, 1.0)]),
"average_pairs_per_bin": 2,
"batch_size": 8,
"additional_metadata": [("StandardScaler", {"metadata_field": "precursor_mz",
"mean": 0.0,
"standard_deviation": 1000.0}),
("CategoricalToBinary", {"metadata_field": "ionmode",
"entries_becoming_one": "positive",
"entries_becoming_zero": "negative"})],
})

model_file_name = os.path.join(results_folder, "ms2deepscore_model.pt")
train_ms2ds_model(spectra, pesticides_test_spectra(), results_folder, settings)

save_model_for_java_output_embeddings(model_file_name, os.path.join(results_folder, "java_ms2deepscore_model.pt"))
Binary file added tests/resources/ms2deepscore_model.pt
Binary file not shown.
13 changes: 4 additions & 9 deletions tests/test_ms2deepscore_evaluated.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
from pathlib import Path
import numpy as np
import pytest
from ms2deepscore import MS2DeepScoreEvaluated
from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore, SettingsEmbeddingEvaluator
from ms2deepscore.models import load_model, LinearModel, EmbeddingEvaluationModel
from ms2deepscore.SettingsMS2Deepscore import SettingsEmbeddingEvaluator
from ms2deepscore.models import LinearModel, EmbeddingEvaluationModel
from tests.test_user_worfklow import load_processed_spectrums


TEST_RESOURCES_PATH = Path(__file__).parent / 'resources'
from tests.create_test_spectra import siamese_spectral_model


def get_test_ms2deepscore_evaluated_instance():
"""Load data and models for MS2DeepScore unit tests."""
spectrums = load_processed_spectrums()

# Load pretrained model
model_file = TEST_RESOURCES_PATH / "testmodel.pt"
model = load_model(model_file)
model = siamese_spectral_model()

embedding_evaluator = EmbeddingEvaluationModel(SettingsEmbeddingEvaluator())
score_evaluator = LinearModel(2)
Expand Down
82 changes: 82 additions & 0 deletions tests/test_ms2deepscore_model_for_java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from math import isclose

import numpy as np
import torch
from matchms import Spectrum

from ms2deepscore import MS2DeepScore
from ms2deepscore.models import load_model


def test_MS2DeepScore_score_pair():
"""Test score calculation using *.pair* method."""
model = load_model(
"../../data/pytorch/gnps_21_08_23_min_5_at_5_percent/trained_models/both_mode_precursor_mz_ionmode_2000_2000_2000_layers_500_embedding_2024_01_31_11_51_10/ms2deepscore_model.pt")
similarity_measure = MS2DeepScore(model)

test_spectrum_1 = Spectrum(mz=np.array([1., 2., 3.]),
intensities=np.array([0.1, 0.1, 0.1]),
metadata={"precursor_mz": 100,
"ionmode": "positive"}
)
test_spectrum_2 = Spectrum(mz=np.array([10., 20., 30.]),
intensities=np.array([0.1, 0.1, 0.1]),
metadata={"precursor_mz": 100,
"ionmode": "positive"}
)

score = similarity_measure.pair(test_spectrum_1, test_spectrum_2)
print(score)
# assert np.allclose(score, 0.990366, atol=1e-6), "Expected different score."
# assert isinstance(score, float), "Expected score to be float"

def create_test_tensors():
bin_size = 9900
metadata_size = 2
spectrum_1_value = 0.1
torch.tensor([np.array([spectrum_1_value] * bin_size)], dtype=torch.float32),
torch.tensor([np.array([1.] * bin_size)], dtype=torch.float32),
torch.tensor([np.array([0.] * metadata_size)], dtype=torch.float32),
torch.tensor([np.array([1.] * metadata_size)], dtype=torch.float32)


def test_siamese_model_forward_pass():
model = load_model("../../../ms2deepscore/ms2deepscore/tests/resources/ms2deepscore_model.pt")
similarity_score = model(torch.tensor([np.array([0.1]*990), np.array([0.2]*990)], dtype=torch.float32),
torch.tensor([np.array([0.2]*990), np.array([0.1]*990)], dtype=torch.float32),
torch.tensor([np.array([0.] * 2), np.array([1.] * 2)], dtype=torch.float32),
torch.tensor([np.array([1.] * 2), np.array([0.] * 2)], dtype=torch.float32))
assert similarity_score.shape[0] == 2
print(similarity_score)


def test_siamese_model_embedding_generation_from_tensor():
"""This test is to compare output of a test model with the output in MZMine for the same model"""
model = load_model("../../../ms2deepscore/ms2deepscore/tests/resources/ms2deepscore_model.pt")
similarity_score = model.encoder(torch.tensor([np.array([0.1]*990), np.array([0.2]*990)], dtype=torch.float32),
torch.tensor([np.array([0.] * 2), np.array([1.] * 2)], dtype=torch.float32),
)
assert similarity_score.shape == (2, 50)
assert isclose(float(similarity_score[0][0]), -4.6007e-02, abs_tol=0.001)
assert isclose(float(similarity_score[1][0]), -3.7386e-02, abs_tol=0.001)


def test_siamese_model_embedding_generation_from_spectrum():
model = load_model("../../../ms2deepscore/ms2deepscore/tests/resources/ms2deepscore_model.pt")
ms2deepscore_model = MS2DeepScore(model)

test_spectra = [Spectrum(mz=np.array([100.1, 200.1, 300.1, 400.1, 500.1]), intensities=np.array([0.2, 0.4, 0.6, 0.8, 1.0]),
metadata={"precursor_mz": 600,
"ionmode": "positive"
}),
Spectrum(mz=np.array([600.1, 700.1, 800.1, 900.1, 1000.1]), intensities=np.array([0.2, 0.4, 0.6, 0.8, 1.0]),
metadata={"precursor_mz": 1000,
"ionmode": "positive"
})]

embeddings = ms2deepscore_model.get_embedding_array(test_spectra)
print(embeddings)
assert embeddings.shape == (2, 50)
scores = ms2deepscore_model.matrix(test_spectra, test_spectra)
print(scores)

Loading