Skip to content

Commit

Permalink
Merge pull request #18 from Degiacomi-Lab/feature_formatting
Browse files Browse the repository at this point in the history
added formatting to verbose and logfile, fixed multiprocessing porblem and GPU usage in analysis_example
  • Loading branch information
degiacom authored May 31, 2024
2 parents 23a9fe5 + 51b97ef commit 10a5711
Show file tree
Hide file tree
Showing 4 changed files with 310 additions and 212 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ Manual installation requires the following three steps:
#### Using molearn without installation ####

Molearn can used without installation by making the sure the requirements above are met, and adding the `src` directory to your path at the beginning of every script, e.g.:
installation using conda while creating a new environment `molearn_env`
```
conda env create --file environment.yml -n molearn_env
```

```
import sys
sys.path.insert(0, 'path/to/molearn/src')
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ dependencies:
- ipywidgets
- plotly
- nglview
- openmmtorchplugin
- pip:
- geomloss
144 changes: 78 additions & 66 deletions examples/analysis_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,69 +6,81 @@
import matplotlib.pyplot as plt


print("> Loading network parameters...")

fname = f'xbb_foldingnet_checkpoints{os.sep}checkpoint_no_optimizer_state_dict_epoch167_loss0.003259085263643.ckpt'
# change 'cpu' to 'cuda' if you have a suitable cuda enabled device
checkpoint = torch.load(fname, map_location=torch.device('cpu'))
net = AutoEncoder(**checkpoint['network_kwargs'])
net.load_state_dict(checkpoint['model_state_dict'])

print("> Loading training data...")

MA = MolearnAnalysis()
MA.set_network(net)

# increasing the batch size makes encoding/decoding operations faster,
# but more memory demanding
MA.batch_size = 4

# increasing processes makes DOPE and Ramachandran scores calculations faster,
# but more more memory demanding
MA.processes = 2

# what follows is a method to re-create the training and test set
# by defining the manual see and loading the dataset in the same order as when
#the neural network was trained, the same train-test split will be obtained
data = PDBData()
data.import_pdb(f'data{os.sep}MurD_closed_selection.pdb')
data.import_pdb(f'data{os.sep}MurD_open_selection.pdb')
data.fix_terminal()
data.atomselect(atoms = ['CA', 'C', 'N', 'CB', 'O'])
data.prepare_dataset()
data_train, data_test = data.split(manual_seed=25)

# store the training and test set in the MolearnAnalysis instance
# the second parameter of the sollowing commands can be both a PDBData instance
# or a path to a multi-PDB file
MA.set_dataset("training", data_train)
MA.set_dataset("test", data_test)

print("> calculating RMSD of training and test set")

err_train = MA.get_error('training')
err_test = MA.get_error('test')

print(f'Mean RMSD is {err_train.mean()} for training set and {err_test.mean()} for test set')
fig, ax = plt.subplots()
violin = ax.violinplot([err_train, err_test], showmeans = True, )
ax.set_xticks([1,2])
ax.set_title('RMSD of training and test set')
ax.set_xticklabels(['Training', 'Test'])
plt.savefig('RMSD_plot.png')


print("> generating error landscape")
# build a 50x50 grid. By default, it will be 10% larger than the region occupied
# by all loaded datasets
MA.setup_grid(50)
landscape_err_latent, landscape_err_3d, xaxis, yaxis = MA.scan_error()

fig, ax = plt.subplots()
c = ax.pcolormesh(xaxis, yaxis, landscape_err_latent)
plt.savefig('Error_grid.png')


## to visualise the GUI, execute the code above in a Jupyter notebook, then call:
# from molearn.analysis import MolearnGUI
# MolearnGUI(MA)
def main():
print("> Loading network parameters...")

fname = f"xbb_foldingnet_checkpoints{os.sep}checkpoint_no_optimizer_state_dict_epoch167_loss0.003259085263643.ckpt"
# if GPU is available we will use the GPU else the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint = torch.load(fname, map_location=device)
net = AutoEncoder(**checkpoint["network_kwargs"])
net.load_state_dict(checkpoint["model_state_dict"])
if torch.cuda.is_available():
# otherwise net is still not on the GPU
net.to(device)

print("> Loading training data...")

MA = MolearnAnalysis()
MA.set_network(net)

# increasing the batch size makes encoding/decoding operations faster,
# but more memory demanding
MA.batch_size = 4

# increasing processes makes DOPE and Ramachandran scores calculations faster,
# but more more memory demanding
MA.processes = 2

# what follows is a method to re-create the training and test set
# by defining the manual see and loading the dataset in the same order as when
# the neural network was trained, the same train-test split will be obtained
data = PDBData()
data.import_pdb(f"data{os.sep}MurD_closed_selection.pdb")
data.import_pdb(f"data{os.sep}MurD_open_selection.pdb")
data.fix_terminal()
data.atomselect(atoms=["CA", "C", "N", "CB", "O"])
data.prepare_dataset()
data_train, data_test = data.split(manual_seed=25)

# store the training and test set in the MolearnAnalysis instance
# the second parameter of the sollowing commands can be both a PDBData instance
# or a path to a multi-PDB file
MA.set_dataset("training", data_train)
MA.set_dataset("test", data_test)

print("> calculating RMSD of training and test set")

err_train = MA.get_error("training")
err_test = MA.get_error("test")

print(
f"Mean RMSD is {err_train.mean()} for training set and {err_test.mean()} for test set"
)
fig, ax = plt.subplots()
_ = ax.violinplot(
[err_train, err_test],
showmeans=True,
)
ax.set_xticks([1, 2])
ax.set_title("RMSD of training and test set")
ax.set_xticklabels(["Training", "Test"])
plt.savefig("RMSD_plot.png")

print("> generating error landscape")
# build a 50x50 grid. By default, it will be 10% larger than the region occupied
# by all loaded datasets
MA.setup_grid(50)
landscape_err_latent, landscape_err_3d, xaxis, yaxis = MA.scan_error()

fig, ax = plt.subplots()
_ = ax.pcolormesh(xaxis, yaxis, landscape_err_latent)
plt.savefig("Error_grid.png")

## to visualise the GUI, execute the code above in a Jupyter notebook, then call:
# from molearn.analysis import MolearnGUI
# MolearnGUI(MA)


if __name__ == "__main__":
main()
Loading

0 comments on commit 10a5711

Please sign in to comment.