Skip to content

Commit

Permalink
added formatting to verbose and logfile, fixed multiprocessing porble…
Browse files Browse the repository at this point in the history
…m and GPU usage in analysis_example
  • Loading branch information
gwirn committed May 31, 2024
1 parent 4e6b640 commit 51b97ef
Show file tree
Hide file tree
Showing 4 changed files with 310 additions and 212 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ Manual installation requires the following three steps:
#### Using molearn without installation ####

Molearn can used without installation by making the sure the requirements above are met, and adding the `src` directory to your path at the beginning of every script, e.g.:
installation using conda while creating a new environment `molearn_env`
```
conda env create --file environment.yml -n molearn_env
```

```
import sys
sys.path.insert(0, 'path/to/molearn/src')
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ dependencies:
- ipywidgets
- plotly
- nglview
- openmmtorchplugin
- pip:
- geomloss
144 changes: 78 additions & 66 deletions examples/analysis_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,69 +6,81 @@
import matplotlib.pyplot as plt


print("> Loading network parameters...")

fname = f'xbb_foldingnet_checkpoints{os.sep}checkpoint_no_optimizer_state_dict_epoch167_loss0.003259085263643.ckpt'
# change 'cpu' to 'cuda' if you have a suitable cuda enabled device
checkpoint = torch.load(fname, map_location=torch.device('cpu'))
net = AutoEncoder(**checkpoint['network_kwargs'])
net.load_state_dict(checkpoint['model_state_dict'])

print("> Loading training data...")

MA = MolearnAnalysis()
MA.set_network(net)

# increasing the batch size makes encoding/decoding operations faster,
# but more memory demanding
MA.batch_size = 4

# increasing processes makes DOPE and Ramachandran scores calculations faster,
# but more more memory demanding
MA.processes = 2

# what follows is a method to re-create the training and test set
# by defining the manual see and loading the dataset in the same order as when
#the neural network was trained, the same train-test split will be obtained
data = PDBData()
data.import_pdb(f'data{os.sep}MurD_closed_selection.pdb')
data.import_pdb(f'data{os.sep}MurD_open_selection.pdb')
data.fix_terminal()
data.atomselect(atoms = ['CA', 'C', 'N', 'CB', 'O'])
data.prepare_dataset()
data_train, data_test = data.split(manual_seed=25)

# store the training and test set in the MolearnAnalysis instance
# the second parameter of the sollowing commands can be both a PDBData instance
# or a path to a multi-PDB file
MA.set_dataset("training", data_train)
MA.set_dataset("test", data_test)

print("> calculating RMSD of training and test set")

err_train = MA.get_error('training')
err_test = MA.get_error('test')

print(f'Mean RMSD is {err_train.mean()} for training set and {err_test.mean()} for test set')
fig, ax = plt.subplots()
violin = ax.violinplot([err_train, err_test], showmeans = True, )
ax.set_xticks([1,2])
ax.set_title('RMSD of training and test set')
ax.set_xticklabels(['Training', 'Test'])
plt.savefig('RMSD_plot.png')


print("> generating error landscape")
# build a 50x50 grid. By default, it will be 10% larger than the region occupied
# by all loaded datasets
MA.setup_grid(50)
landscape_err_latent, landscape_err_3d, xaxis, yaxis = MA.scan_error()

fig, ax = plt.subplots()
c = ax.pcolormesh(xaxis, yaxis, landscape_err_latent)
plt.savefig('Error_grid.png')


## to visualise the GUI, execute the code above in a Jupyter notebook, then call:
# from molearn.analysis import MolearnGUI
# MolearnGUI(MA)
def main():
print("> Loading network parameters...")

fname = f"xbb_foldingnet_checkpoints{os.sep}checkpoint_no_optimizer_state_dict_epoch167_loss0.003259085263643.ckpt"
# if GPU is available we will use the GPU else the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint = torch.load(fname, map_location=device)
net = AutoEncoder(**checkpoint["network_kwargs"])
net.load_state_dict(checkpoint["model_state_dict"])
if torch.cuda.is_available():
# otherwise net is still not on the GPU
net.to(device)

print("> Loading training data...")

MA = MolearnAnalysis()
MA.set_network(net)

# increasing the batch size makes encoding/decoding operations faster,
# but more memory demanding
MA.batch_size = 4

# increasing processes makes DOPE and Ramachandran scores calculations faster,
# but more more memory demanding
MA.processes = 2

# what follows is a method to re-create the training and test set
# by defining the manual see and loading the dataset in the same order as when
# the neural network was trained, the same train-test split will be obtained
data = PDBData()
data.import_pdb(f"data{os.sep}MurD_closed_selection.pdb")
data.import_pdb(f"data{os.sep}MurD_open_selection.pdb")
data.fix_terminal()
data.atomselect(atoms=["CA", "C", "N", "CB", "O"])
data.prepare_dataset()
data_train, data_test = data.split(manual_seed=25)

# store the training and test set in the MolearnAnalysis instance
# the second parameter of the sollowing commands can be both a PDBData instance
# or a path to a multi-PDB file
MA.set_dataset("training", data_train)
MA.set_dataset("test", data_test)

print("> calculating RMSD of training and test set")

err_train = MA.get_error("training")
err_test = MA.get_error("test")

print(
f"Mean RMSD is {err_train.mean()} for training set and {err_test.mean()} for test set"
)
fig, ax = plt.subplots()
_ = ax.violinplot(
[err_train, err_test],
showmeans=True,
)
ax.set_xticks([1, 2])
ax.set_title("RMSD of training and test set")
ax.set_xticklabels(["Training", "Test"])
plt.savefig("RMSD_plot.png")

print("> generating error landscape")
# build a 50x50 grid. By default, it will be 10% larger than the region occupied
# by all loaded datasets
MA.setup_grid(50)
landscape_err_latent, landscape_err_3d, xaxis, yaxis = MA.scan_error()

fig, ax = plt.subplots()
_ = ax.pcolormesh(xaxis, yaxis, landscape_err_latent)
plt.savefig("Error_grid.png")

## to visualise the GUI, execute the code above in a Jupyter notebook, then call:
# from molearn.analysis import MolearnGUI
# MolearnGUI(MA)


if __name__ == "__main__":
main()
Loading

0 comments on commit 51b97ef

Please sign in to comment.