Skip to content

Commit

Permalink
Add DNA support
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Nov 17, 2024
1 parent 999a9e0 commit 79244d9
Showing 1 changed file with 20 additions and 4 deletions.
24 changes: 20 additions & 4 deletions boltz1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"provenance": [],
"machine_shape": "hm",
"gpuType": "A100",
"authorship_tag": "ABX9TyNTASXa0TPusw2Bq/ltniMh",
"authorship_tag": "ABX9TyP7Wu7UlNfSKmSCprzBFVVC",
"include_colab_link": true
},
"kernelspec": {
Expand Down Expand Up @@ -64,12 +64,15 @@
"#@markdown - Use `:` to specify multiple ligands as smile strings\n",
"ligand_input_ccd = 'SAH' #@param {type:\"string\"}\n",
"#@markdown - Use `:` to specify multiple ligands as CCD codes (three-letter codes)\n",
"dna_input = '' #@param {type:\"string\"}\n",
"#@markdown - Use `:` to specify multiple DNA sequences\n",
"jobname = 'test' #@param {type:\"string\"}\n",
"\n",
"# Clean up the query sequence and jobname\n",
"query_sequence = \"\".join(query_sequence.split())\n",
"ligand_input = \"\".join(ligand_input.split())\n",
"ligand_input_ccd = \"\".join(ligand_input_ccd.split())\n",
"dna_input = \"\".join(dna_input.split())\n",
"basejobname = \"\".join(jobname.split())\n",
"basejobname = re.sub(r'\\W+', '', basejobname)\n",
"jobname = add_hash(basejobname, query_sequence)\n",
Expand All @@ -90,9 +93,11 @@
"from string import ascii_uppercase\n",
"\n",
"# Split sequences on chain breaks\n",
"protein_sequences = query_sequence.strip().split(':')\n",
"ligand_sequences = ligand_input.strip().split(':')\n",
"ligand_sequences_ccd = ligand_input_ccd.strip().split(':')\n",
"protein_sequences = query_sequence.strip().split(':') if query_sequence.strip() else []\n",
"ligand_sequences = ligand_input.strip().split(':') if ligand_input.strip() else []\n",
"ligand_sequences_ccd = ligand_input_ccd.strip().split(':') if ligand_input_ccd.strip() else []\n",
"dna_sequences = dna_input.strip().split(':') if dna_input.strip() else []\n",
"\n",
"# Initialize chain labels starting from 'A'\n",
"chain_labels = iter(ascii_uppercase)\n",
"\n",
Expand Down Expand Up @@ -127,6 +132,17 @@
" sequence = lig\n",
" fasta_entries.append((header, sequence))\n",
"\n",
"# Process DNA sequences (NO MSA is generated)\n",
"for seq in dna_sequences:\n",
" seq = seq.strip()\n",
" if not seq:\n",
" continue # Skip empty sequences\n",
" chain_label = next(chain_labels)\n",
" lig_type = 'DNA'\n",
" header = f\">{chain_label}|{lig_type}\"\n",
" sequence = seq\n",
" fasta_entries.append((header, sequence))\n",
"\n",
"# Process ligand sequences (CCD codes)\n",
"for lig in ligand_sequences_ccd:\n",
" lig = lig.strip()\n",
Expand Down

0 comments on commit 79244d9

Please sign in to comment.