Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added formatting to verbose and logfile, fixed multiprocessing porblem and GPU usage in analysis_example #18

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ Manual installation requires the following three steps:
#### Using molearn without installation ####

Molearn can used without installation by making the sure the requirements above are met, and adding the `src` directory to your path at the beginning of every script, e.g.:
installation using conda while creating a new environment `molearn_env`
```
conda env create --file environment.yml -n molearn_env
```

```
import sys
sys.path.insert(0, 'path/to/molearn/src')
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ dependencies:
- ipywidgets
- plotly
- nglview
- openmmtorchplugin
- pip:
- geomloss
144 changes: 78 additions & 66 deletions examples/analysis_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,69 +6,81 @@
import matplotlib.pyplot as plt


print("> Loading network parameters...")

fname = f'xbb_foldingnet_checkpoints{os.sep}checkpoint_no_optimizer_state_dict_epoch167_loss0.003259085263643.ckpt'
# change 'cpu' to 'cuda' if you have a suitable cuda enabled device
checkpoint = torch.load(fname, map_location=torch.device('cpu'))
net = AutoEncoder(**checkpoint['network_kwargs'])
net.load_state_dict(checkpoint['model_state_dict'])

print("> Loading training data...")

MA = MolearnAnalysis()
MA.set_network(net)

# increasing the batch size makes encoding/decoding operations faster,
# but more memory demanding
MA.batch_size = 4

# increasing processes makes DOPE and Ramachandran scores calculations faster,
# but more more memory demanding
MA.processes = 2

# what follows is a method to re-create the training and test set
# by defining the manual see and loading the dataset in the same order as when
#the neural network was trained, the same train-test split will be obtained
data = PDBData()
data.import_pdb(f'data{os.sep}MurD_closed_selection.pdb')
data.import_pdb(f'data{os.sep}MurD_open_selection.pdb')
data.fix_terminal()
data.atomselect(atoms = ['CA', 'C', 'N', 'CB', 'O'])
data.prepare_dataset()
data_train, data_test = data.split(manual_seed=25)

# store the training and test set in the MolearnAnalysis instance
# the second parameter of the sollowing commands can be both a PDBData instance
# or a path to a multi-PDB file
MA.set_dataset("training", data_train)
MA.set_dataset("test", data_test)

print("> calculating RMSD of training and test set")

err_train = MA.get_error('training')
err_test = MA.get_error('test')

print(f'Mean RMSD is {err_train.mean()} for training set and {err_test.mean()} for test set')
fig, ax = plt.subplots()
violin = ax.violinplot([err_train, err_test], showmeans = True, )
ax.set_xticks([1,2])
ax.set_title('RMSD of training and test set')
ax.set_xticklabels(['Training', 'Test'])
plt.savefig('RMSD_plot.png')


print("> generating error landscape")
# build a 50x50 grid. By default, it will be 10% larger than the region occupied
# by all loaded datasets
MA.setup_grid(50)
landscape_err_latent, landscape_err_3d, xaxis, yaxis = MA.scan_error()

fig, ax = plt.subplots()
c = ax.pcolormesh(xaxis, yaxis, landscape_err_latent)
plt.savefig('Error_grid.png')


## to visualise the GUI, execute the code above in a Jupyter notebook, then call:
# from molearn.analysis import MolearnGUI
# MolearnGUI(MA)
def main():
print("> Loading network parameters...")

fname = f"xbb_foldingnet_checkpoints{os.sep}checkpoint_no_optimizer_state_dict_epoch167_loss0.003259085263643.ckpt"
# if GPU is available we will use the GPU else the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint = torch.load(fname, map_location=device)
net = AutoEncoder(**checkpoint["network_kwargs"])
net.load_state_dict(checkpoint["model_state_dict"])
if torch.cuda.is_available():
# otherwise net is still not on the GPU
net.to(device)

print("> Loading training data...")

MA = MolearnAnalysis()
MA.set_network(net)

# increasing the batch size makes encoding/decoding operations faster,
# but more memory demanding
MA.batch_size = 4

# increasing processes makes DOPE and Ramachandran scores calculations faster,
# but more more memory demanding
MA.processes = 2

# what follows is a method to re-create the training and test set
# by defining the manual see and loading the dataset in the same order as when
# the neural network was trained, the same train-test split will be obtained
data = PDBData()
data.import_pdb(f"data{os.sep}MurD_closed_selection.pdb")
data.import_pdb(f"data{os.sep}MurD_open_selection.pdb")
data.fix_terminal()
data.atomselect(atoms=["CA", "C", "N", "CB", "O"])
data.prepare_dataset()
data_train, data_test = data.split(manual_seed=25)

# store the training and test set in the MolearnAnalysis instance
# the second parameter of the sollowing commands can be both a PDBData instance
# or a path to a multi-PDB file
MA.set_dataset("training", data_train)
MA.set_dataset("test", data_test)

print("> calculating RMSD of training and test set")

err_train = MA.get_error("training")
err_test = MA.get_error("test")

print(
f"Mean RMSD is {err_train.mean()} for training set and {err_test.mean()} for test set"
)
fig, ax = plt.subplots()
_ = ax.violinplot(
[err_train, err_test],
showmeans=True,
)
ax.set_xticks([1, 2])
ax.set_title("RMSD of training and test set")
ax.set_xticklabels(["Training", "Test"])
plt.savefig("RMSD_plot.png")

print("> generating error landscape")
# build a 50x50 grid. By default, it will be 10% larger than the region occupied
# by all loaded datasets
MA.setup_grid(50)
landscape_err_latent, landscape_err_3d, xaxis, yaxis = MA.scan_error()

fig, ax = plt.subplots()
_ = ax.pcolormesh(xaxis, yaxis, landscape_err_latent)
plt.savefig("Error_grid.png")

## to visualise the GUI, execute the code above in a Jupyter notebook, then call:
# from molearn.analysis import MolearnGUI
# MolearnGUI(MA)


if __name__ == "__main__":
main()
Loading
Loading