Skip to content

Commit

Permalink
First version of BioEmu notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Feb 23, 2025
1 parent 406d4c6 commit cc06daf
Showing 1 changed file with 271 additions and 0 deletions.
271 changes: 271 additions & 0 deletions BioEmu.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"machine_shape": "hm",
"gpuType": "A100",
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/BioEmu.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "bioemu_title"
},
"source": [
"# **Biomolecular Emulator (BioEmu) in ColabFold**\n",
"<img src=\"https://github.com/microsoft/bioemu/raw/main/assets/emu.png\" height=\"130\" align=\"right\" style=\"height:240px\">\n",
"\n",
"[BioEmu](https://github.com/microsoft/bioemu) is a framework for emulating biomolecular dynamics and integrating structural prediction tools to accelerate research in structural biology and protein engineering. This notebook builds uses ColabFold to generate the MSA.\n",
"\n",
"\n",
"\n",
"For more details, please read the [BioEmu Preprint](https://www.biorxiv.org/content/10.1101/2024.12.05.626885v1.abstract).\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "user_input_code",
"cellView": "form"
},
"source": [
"#@title **Enter Your Protein Sequence & Sampling Parameters**\n",
"sequence = \"GYDPETGTWG\" #@param {type:\"string\"}\n",
"num_samples = 10 #@param {type:\"integer\"}\n",
"output_dir = \"/content/test-chignolin\" #@param {type:\"string\"}\n",
"side_chain_reconstruction = True #@param {type:\"boolean\"}\n",
"\n",
"print(f\"Sequence: {sequence}\\nNumber of samples: {num_samples}\\nOutput directory: {output_dir}\")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "install_code",
"cellView": "form"
},
"source": [
"#@title Install\n",
"%%time\n",
"import sys\n",
"import os\n",
"\n",
"\n",
"print(\"Cloning BioEmu...\")\n",
"!git clone -q https://github.com/microsoft/bioemu.git\n",
"\n",
"if not os.path.isfile(\"COLABFOLD_READY\"):\n",
" print(\"installing colabfold...\")\n",
" os.system(\"pip install -q --no-warn-conflicts 'colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold'\")\n",
" if os.environ.get('TPU_NAME', False) != False:\n",
" os.system(\"pip uninstall -y jax jaxlib\")\n",
" os.system(\"pip install --no-warn-conflicts --upgrade dm-haiku==0.0.10 'jax[cuda12_pip]'==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\")\n",
" os.system(\"ln -s /usr/local/lib/python3.*/dist-packages/colabfold colabfold\")\n",
" os.system(\"ln -s /usr/local/lib/python3.*/dist-packages/alphafold alphafold\")\n",
" os.system(\"touch COLABFOLD_READY\")\n",
"\n",
"print(\"Patching colabfold installation...\")\n",
"os.system(\"patch /usr/local/lib/python3.*/dist-packages/alphafold/model/modules.py bioemu/colabfold_setup/modules.patch\")\n",
"\n",
"patch_text = r\"\"\"\n",
"430d429\n",
"<\n",
"478a478\n",
"> np.save(files.get(\"single_repr_evo\", \"npy\"), result[\"representations_evo\"][\"single\"])\n",
"480a481\n",
"> np.save(files.get(\"pair_repr_evo\", \"npy\"), result[\"representations_evo\"][\"pair\"])\n",
"\"\"\"\n",
"\n",
"with open(\"inline_batch.patch\",\"w\") as f:\n",
" f.write(patch_text)\n",
"\n",
"os.system(\"patch /usr/local/lib/python3.*/dist-packages/colabfold/batch.py inline_batch.patch\")\n",
"\n",
"# Optionally install side-chain reconstruction tools (HPacker)\n",
"if side_chain_reconstruction:\n",
" print(\"Enabling side-chain reconstruction tools (HPacker)...\")\n",
" if not os.path.exists(\"hpacker\"):\n",
" !git clone -q https://github.com/gvisani/hpacker.git\n",
" %cd hpacker\n",
" !pip install -q --no-warn-conflicts .\n",
" %cd ..\n",
" else:\n",
" print(\"HPacker repo already cloned.\")\n",
"else:\n",
" print(\"Side-chain reconstruction not selected. Skipping HPacker installation.\")\n",
"\n",
"# Finally, install the BioEmu package itself.\n",
"if not os.path.isfile(\"BIOEMU_READY\"):\n",
" if not os.path.exists(\"bioemu\"):\n",
" %cd bioemu\n",
" else:\n",
" %cd bioemu\n",
" !pip install -q --no-warn-conflicts \".[md]\"\n",
" os.system(\"touch BIOEMU_READY\")\n",
"\n",
"print(\"Installation complete.\")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "sampling_code",
"cellView": "form"
},
"source": [
"#@title Run BioEmu Sampling\n",
"import subprocess, sys\n",
"\n",
"cmd = [\n",
" sys.executable, \"-m\", \"bioemu.sample\",\n",
" \"--sequence\", sequence,\n",
" \"--num_samples\", str(num_samples),\n",
" \"--output_dir\", output_dir\n",
"]\n",
"\n",
"print(\"Running command:\\n\", \" \".join(cmd))\n",
"subprocess.run(cmd)\n",
"print(\"\\nSampling complete. Check the\", output_dir, \"directory for PDB and XTC files.\")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "viz_code",
"cellView": "form"
},
"source": [
"#@title Visualize Frames\n",
"\n",
"import os\n",
"import tempfile\n",
"import ipywidgets as widgets\n",
"from IPython.display import display\n",
"import py3Dmol\n",
"import mdtraj as md\n",
"\n",
"# Provide your actual output_dir, or ensure these files exist\n",
"pdb_path = os.path.join(output_dir, \"topology.pdb\")\n",
"xtc_path = os.path.join(output_dir, \"samples.xtc\")\n",
"def frame_to_pdb_string(frame):\n",
" \"\"\"Write a single-frame mdtraj.Trajectory to a temp PDB file, return as string.\"\"\"\n",
" with tempfile.NamedTemporaryFile(mode='w', suffix='.pdb', delete=False) as tmp:\n",
" tmp_filename = tmp.name\n",
" frame.save_pdb(tmp_filename)\n",
" with open(tmp_filename, 'r') as f:\n",
" pdb_str = f.read()\n",
" os.remove(tmp_filename)\n",
" return pdb_str\n",
"\n",
"# Create the py3Dmol view once\n",
"view = py3Dmol.view(width=600, height=400)\n",
"traj = md.load(xtc_path, top=pdb_path)\n",
"pdb_str = frame_to_pdb_string(traj[:1])\n",
"view.addModel(pdb_str, \"pdb\")\n",
"view.setStyle({\"cartoon\": {\"color\": \"spectrum\"}})\n",
"view.zoomTo()\n",
"view.show() # Show the viewer just once here\n",
"\n",
"# Create the slider\n",
"frame_slider = widgets.IntSlider(\n",
" value=0,\n",
" min=0,\n",
" max=len(traj) - 1,\n",
" step=1,\n",
" description='Frame'\n",
")\n",
"\n",
"def on_frame_change(change):\n",
" \"\"\"Update the existing py3Dmol view in place when the slider changes.\"\"\"\n",
" frame_idx = change[\"new\"]\n",
" pdb_str = frame_to_pdb_string(traj[frame_idx:frame_idx+1])\n",
"\n",
" view.removeAllModels()\n",
" view.addModel(pdb_str, \"pdb\")\n",
" view.setStyle({\"cartoon\": {\"color\": \"spectrum\"}})\n",
" view.zoomTo()\n",
" # Force an update of the existing view instead of creating a new cell output\n",
" view.update()\n",
"\n",
"frame_slider.observe(on_frame_change, names=\"value\")\n",
"display(frame_slider)\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "sidechain_run",
"cellView": "form"
},
"source": [
"#@title Run Side-Chain Reconstruction\n",
"import subprocess, sys\n",
"if side_chain_reconstruction:\n",
" pdb_path = os.path.join(output_dir, \"topology.pdb\")\n",
" xtc_path = os.path.join(output_dir, \"samples.xtc\")\n",
"\n",
" if not (os.path.isfile(pdb_path) and os.path.isfile(xtc_path)):\n",
" print(\"Could not find PDB/XTC for side-chain reconstruction.\")\n",
" else:\n",
" cmd_sidechain = [\n",
" sys.executable, \"-m\", \"bioemu.sidechain_relax\",\n",
" \"--pdb-path\", pdb_path,\n",
" \"--xtc-path\", xtc_path,\n",
" \"--no-md-equil\" # remove this arg if you want a short MD equilibration\n",
" ]\n",
" print(\"Running side-chain reconstruction:\\n\", \" \".join(cmd_sidechain))\n",
" subprocess.run(cmd_sidechain)\n",
" print(\"\\nSide-chain reconstruction complete! Check 'samples_sidechain_rec.pdb/xtc'.\")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "zip_download",
"cellView": "form"
},
"source": [
"#@title Zip and Download All Results\n",
"from google.colab import files\n",
"\n",
"zip_name = \"bioemu_samples.zip\"\n",
"!zip -r $zip_name $output_dir\n",
"files.download(zip_name)"
],
"execution_count": null,
"outputs": []
}
]
}

0 comments on commit cc06daf

Please sign in to comment.