From 79244d9a0d20d5200bb61745b1cb892747946853 Mon Sep 17 00:00:00 2001 From: Martin Steinegger Date: Mon, 18 Nov 2024 00:59:17 +0100 Subject: [PATCH] Add DNA support --- boltz1.ipynb | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/boltz1.ipynb b/boltz1.ipynb index 88f21c9a..255a32e6 100644 --- a/boltz1.ipynb +++ b/boltz1.ipynb @@ -6,7 +6,7 @@ "provenance": [], "machine_shape": "hm", "gpuType": "A100", - "authorship_tag": "ABX9TyNTASXa0TPusw2Bq/ltniMh", + "authorship_tag": "ABX9TyP7Wu7UlNfSKmSCprzBFVVC", "include_colab_link": true }, "kernelspec": { @@ -64,12 +64,15 @@ "#@markdown - Use `:` to specify multiple ligands as smile strings\n", "ligand_input_ccd = 'SAH' #@param {type:\"string\"}\n", "#@markdown - Use `:` to specify multiple ligands as CCD codes (three-letter codes)\n", + "dna_input = '' #@param {type:\"string\"}\n", + "#@markdown - Use `:` to specify multiple DNA sequences\n", "jobname = 'test' #@param {type:\"string\"}\n", "\n", "# Clean up the query sequence and jobname\n", "query_sequence = \"\".join(query_sequence.split())\n", "ligand_input = \"\".join(ligand_input.split())\n", "ligand_input_ccd = \"\".join(ligand_input_ccd.split())\n", + "dna_input = \"\".join(dna_input.split())\n", "basejobname = \"\".join(jobname.split())\n", "basejobname = re.sub(r'\\W+', '', basejobname)\n", "jobname = add_hash(basejobname, query_sequence)\n", @@ -90,9 +93,11 @@ "from string import ascii_uppercase\n", "\n", "# Split sequences on chain breaks\n", - "protein_sequences = query_sequence.strip().split(':')\n", - "ligand_sequences = ligand_input.strip().split(':')\n", - "ligand_sequences_ccd = ligand_input_ccd.strip().split(':')\n", + "protein_sequences = query_sequence.strip().split(':') if query_sequence.strip() else []\n", + "ligand_sequences = ligand_input.strip().split(':') if ligand_input.strip() else []\n", + "ligand_sequences_ccd = ligand_input_ccd.strip().split(':') if ligand_input_ccd.strip() else []\n", + "dna_sequences = dna_input.strip().split(':') if dna_input.strip() else []\n", + "\n", "# Initialize chain labels starting from 'A'\n", "chain_labels = iter(ascii_uppercase)\n", "\n", @@ -127,6 +132,17 @@ " sequence = lig\n", " fasta_entries.append((header, sequence))\n", "\n", + "# Process DNA sequences (NO MSA is generated)\n", + "for seq in dna_sequences:\n", + " seq = seq.strip()\n", + " if not seq:\n", + " continue # Skip empty sequences\n", + " chain_label = next(chain_labels)\n", + " lig_type = 'DNA'\n", + " header = f\">{chain_label}|{lig_type}\"\n", + " sequence = seq\n", + " fasta_entries.append((header, sequence))\n", + "\n", "# Process ligand sequences (CCD codes)\n", "for lig in ligand_sequences_ccd:\n", " lig = lig.strip()\n",