-
Notifications
You must be signed in to change notification settings - Fork 523
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
406d4c6
commit cc06daf
Showing
1 changed file
with
271 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,271 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"machine_shape": "hm", | ||
"gpuType": "A100", | ||
"provenance": [], | ||
"include_colab_link": true | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
}, | ||
"accelerator": "GPU" | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "view-in-github", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"<a href=\"https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/BioEmu.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "bioemu_title" | ||
}, | ||
"source": [ | ||
"# **Biomolecular Emulator (BioEmu) in ColabFold**\n", | ||
"<img src=\"https://github.com/microsoft/bioemu/raw/main/assets/emu.png\" height=\"130\" align=\"right\" style=\"height:240px\">\n", | ||
"\n", | ||
"[BioEmu](https://github.com/microsoft/bioemu) is a framework for emulating biomolecular dynamics and integrating structural prediction tools to accelerate research in structural biology and protein engineering. This notebook builds uses ColabFold to generate the MSA.\n", | ||
"\n", | ||
"\n", | ||
"\n", | ||
"For more details, please read the [BioEmu Preprint](https://www.biorxiv.org/content/10.1101/2024.12.05.626885v1.abstract).\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "user_input_code", | ||
"cellView": "form" | ||
}, | ||
"source": [ | ||
"#@title **Enter Your Protein Sequence & Sampling Parameters**\n", | ||
"sequence = \"GYDPETGTWG\" #@param {type:\"string\"}\n", | ||
"num_samples = 10 #@param {type:\"integer\"}\n", | ||
"output_dir = \"/content/test-chignolin\" #@param {type:\"string\"}\n", | ||
"side_chain_reconstruction = True #@param {type:\"boolean\"}\n", | ||
"\n", | ||
"print(f\"Sequence: {sequence}\\nNumber of samples: {num_samples}\\nOutput directory: {output_dir}\")" | ||
], | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "install_code", | ||
"cellView": "form" | ||
}, | ||
"source": [ | ||
"#@title Install\n", | ||
"%%time\n", | ||
"import sys\n", | ||
"import os\n", | ||
"\n", | ||
"\n", | ||
"print(\"Cloning BioEmu...\")\n", | ||
"!git clone -q https://github.com/microsoft/bioemu.git\n", | ||
"\n", | ||
"if not os.path.isfile(\"COLABFOLD_READY\"):\n", | ||
" print(\"installing colabfold...\")\n", | ||
" os.system(\"pip install -q --no-warn-conflicts 'colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold'\")\n", | ||
" if os.environ.get('TPU_NAME', False) != False:\n", | ||
" os.system(\"pip uninstall -y jax jaxlib\")\n", | ||
" os.system(\"pip install --no-warn-conflicts --upgrade dm-haiku==0.0.10 'jax[cuda12_pip]'==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\")\n", | ||
" os.system(\"ln -s /usr/local/lib/python3.*/dist-packages/colabfold colabfold\")\n", | ||
" os.system(\"ln -s /usr/local/lib/python3.*/dist-packages/alphafold alphafold\")\n", | ||
" os.system(\"touch COLABFOLD_READY\")\n", | ||
"\n", | ||
"print(\"Patching colabfold installation...\")\n", | ||
"os.system(\"patch /usr/local/lib/python3.*/dist-packages/alphafold/model/modules.py bioemu/colabfold_setup/modules.patch\")\n", | ||
"\n", | ||
"patch_text = r\"\"\"\n", | ||
"430d429\n", | ||
"<\n", | ||
"478a478\n", | ||
"> np.save(files.get(\"single_repr_evo\", \"npy\"), result[\"representations_evo\"][\"single\"])\n", | ||
"480a481\n", | ||
"> np.save(files.get(\"pair_repr_evo\", \"npy\"), result[\"representations_evo\"][\"pair\"])\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"with open(\"inline_batch.patch\",\"w\") as f:\n", | ||
" f.write(patch_text)\n", | ||
"\n", | ||
"os.system(\"patch /usr/local/lib/python3.*/dist-packages/colabfold/batch.py inline_batch.patch\")\n", | ||
"\n", | ||
"# Optionally install side-chain reconstruction tools (HPacker)\n", | ||
"if side_chain_reconstruction:\n", | ||
" print(\"Enabling side-chain reconstruction tools (HPacker)...\")\n", | ||
" if not os.path.exists(\"hpacker\"):\n", | ||
" !git clone -q https://github.com/gvisani/hpacker.git\n", | ||
" %cd hpacker\n", | ||
" !pip install -q --no-warn-conflicts .\n", | ||
" %cd ..\n", | ||
" else:\n", | ||
" print(\"HPacker repo already cloned.\")\n", | ||
"else:\n", | ||
" print(\"Side-chain reconstruction not selected. Skipping HPacker installation.\")\n", | ||
"\n", | ||
"# Finally, install the BioEmu package itself.\n", | ||
"if not os.path.isfile(\"BIOEMU_READY\"):\n", | ||
" if not os.path.exists(\"bioemu\"):\n", | ||
" %cd bioemu\n", | ||
" else:\n", | ||
" %cd bioemu\n", | ||
" !pip install -q --no-warn-conflicts \".[md]\"\n", | ||
" os.system(\"touch BIOEMU_READY\")\n", | ||
"\n", | ||
"print(\"Installation complete.\")" | ||
], | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "sampling_code", | ||
"cellView": "form" | ||
}, | ||
"source": [ | ||
"#@title Run BioEmu Sampling\n", | ||
"import subprocess, sys\n", | ||
"\n", | ||
"cmd = [\n", | ||
" sys.executable, \"-m\", \"bioemu.sample\",\n", | ||
" \"--sequence\", sequence,\n", | ||
" \"--num_samples\", str(num_samples),\n", | ||
" \"--output_dir\", output_dir\n", | ||
"]\n", | ||
"\n", | ||
"print(\"Running command:\\n\", \" \".join(cmd))\n", | ||
"subprocess.run(cmd)\n", | ||
"print(\"\\nSampling complete. Check the\", output_dir, \"directory for PDB and XTC files.\")" | ||
], | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "viz_code", | ||
"cellView": "form" | ||
}, | ||
"source": [ | ||
"#@title Visualize Frames\n", | ||
"\n", | ||
"import os\n", | ||
"import tempfile\n", | ||
"import ipywidgets as widgets\n", | ||
"from IPython.display import display\n", | ||
"import py3Dmol\n", | ||
"import mdtraj as md\n", | ||
"\n", | ||
"# Provide your actual output_dir, or ensure these files exist\n", | ||
"pdb_path = os.path.join(output_dir, \"topology.pdb\")\n", | ||
"xtc_path = os.path.join(output_dir, \"samples.xtc\")\n", | ||
"def frame_to_pdb_string(frame):\n", | ||
" \"\"\"Write a single-frame mdtraj.Trajectory to a temp PDB file, return as string.\"\"\"\n", | ||
" with tempfile.NamedTemporaryFile(mode='w', suffix='.pdb', delete=False) as tmp:\n", | ||
" tmp_filename = tmp.name\n", | ||
" frame.save_pdb(tmp_filename)\n", | ||
" with open(tmp_filename, 'r') as f:\n", | ||
" pdb_str = f.read()\n", | ||
" os.remove(tmp_filename)\n", | ||
" return pdb_str\n", | ||
"\n", | ||
"# Create the py3Dmol view once\n", | ||
"view = py3Dmol.view(width=600, height=400)\n", | ||
"traj = md.load(xtc_path, top=pdb_path)\n", | ||
"pdb_str = frame_to_pdb_string(traj[:1])\n", | ||
"view.addModel(pdb_str, \"pdb\")\n", | ||
"view.setStyle({\"cartoon\": {\"color\": \"spectrum\"}})\n", | ||
"view.zoomTo()\n", | ||
"view.show() # Show the viewer just once here\n", | ||
"\n", | ||
"# Create the slider\n", | ||
"frame_slider = widgets.IntSlider(\n", | ||
" value=0,\n", | ||
" min=0,\n", | ||
" max=len(traj) - 1,\n", | ||
" step=1,\n", | ||
" description='Frame'\n", | ||
")\n", | ||
"\n", | ||
"def on_frame_change(change):\n", | ||
" \"\"\"Update the existing py3Dmol view in place when the slider changes.\"\"\"\n", | ||
" frame_idx = change[\"new\"]\n", | ||
" pdb_str = frame_to_pdb_string(traj[frame_idx:frame_idx+1])\n", | ||
"\n", | ||
" view.removeAllModels()\n", | ||
" view.addModel(pdb_str, \"pdb\")\n", | ||
" view.setStyle({\"cartoon\": {\"color\": \"spectrum\"}})\n", | ||
" view.zoomTo()\n", | ||
" # Force an update of the existing view instead of creating a new cell output\n", | ||
" view.update()\n", | ||
"\n", | ||
"frame_slider.observe(on_frame_change, names=\"value\")\n", | ||
"display(frame_slider)\n" | ||
], | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "sidechain_run", | ||
"cellView": "form" | ||
}, | ||
"source": [ | ||
"#@title Run Side-Chain Reconstruction\n", | ||
"import subprocess, sys\n", | ||
"if side_chain_reconstruction:\n", | ||
" pdb_path = os.path.join(output_dir, \"topology.pdb\")\n", | ||
" xtc_path = os.path.join(output_dir, \"samples.xtc\")\n", | ||
"\n", | ||
" if not (os.path.isfile(pdb_path) and os.path.isfile(xtc_path)):\n", | ||
" print(\"Could not find PDB/XTC for side-chain reconstruction.\")\n", | ||
" else:\n", | ||
" cmd_sidechain = [\n", | ||
" sys.executable, \"-m\", \"bioemu.sidechain_relax\",\n", | ||
" \"--pdb-path\", pdb_path,\n", | ||
" \"--xtc-path\", xtc_path,\n", | ||
" \"--no-md-equil\" # remove this arg if you want a short MD equilibration\n", | ||
" ]\n", | ||
" print(\"Running side-chain reconstruction:\\n\", \" \".join(cmd_sidechain))\n", | ||
" subprocess.run(cmd_sidechain)\n", | ||
" print(\"\\nSide-chain reconstruction complete! Check 'samples_sidechain_rec.pdb/xtc'.\")" | ||
], | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"metadata": { | ||
"id": "zip_download", | ||
"cellView": "form" | ||
}, | ||
"source": [ | ||
"#@title Zip and Download All Results\n", | ||
"from google.colab import files\n", | ||
"\n", | ||
"zip_name = \"bioemu_samples.zip\"\n", | ||
"!zip -r $zip_name $output_dir\n", | ||
"files.download(zip_name)" | ||
], | ||
"execution_count": null, | ||
"outputs": [] | ||
} | ||
] | ||
} |