upadte files

nilshof01 · May 10, 2023 · cfce8f7 · cfce8f7
commit cfce8f7
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 0 deletions.
diff --git a/felxibility_plot.py b/felxibility_plot.py
@@ -0,0 +1,23 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+residues = ["417","445", "449","484","501"]
+flex_wild = [0.994, 4.588, 2.988, 2.130,2.373]
+flex_omikron = [0.797, 2.503, 1.354, 1.523, 1.134]
+
+fig, ax = plt.subplots()
+
+bar_width = 0.25
+x1_positions = np.arange(0.375, len(residues), 1)
+x2_positions = np.arange(0.375, len(residues), 1)
+x3_positions = [x + bar_width for x in x2_positions]
+
+ax.bar(x2_positions, flex_wild, width=bar_width, label='Wildtype')
+ax.bar(x3_positions, flex_omikron, width=bar_width, label='B.1.1.529')
+ax.set_xticks([x + bar_width for x in x1_positions])
+ax.set_xticklabels(residues)
+plt.legend()
+plt.xlabel("Residues")
+plt.ylabel("RMSF")
diff --git a/find_orf_spike_protein.py b/find_orf_spike_protein.py
@@ -0,0 +1,50 @@
+from Bio import SeqIO
+
+# Open the FASTA file and parse the nucleotide sequence
+
+
+
+
+### find orf and
+def find_orf(scaffolds, start_pos = 21562, end_pos = 25384):
+    with open(scaffolds, "r") as handle:
+        record = SeqIO.read(handle, "fasta")
+        nucleotide_seq = record.seq
+    s_gene_start = start_pos # Python uses 0-based indexing
+    s_gene_end = end_pos
+
+    # Find the closest ATG near the annotated start position
+    start_codon = "ATG"
+    search_start = max(0, s_gene_start - 50)
+    search_end = s_gene_start + 50
+
+    closest_atg_pos = -1
+    min_distance = float('inf')
+
+    for pos in range(search_start, search_end):
+        if record.seq[pos:pos+3] == start_codon:
+            distance = abs(s_gene_start - pos)
+            if distance < min_distance:
+                closest_atg_pos = pos
+                min_distance = distance
+
+    if closest_atg_pos != -1:
+        adjusted_s_gene_start = closest_atg_pos
+        adjusted_s_gene_sequence = record.seq[adjusted_s_gene_start:s_gene_end]
+        adjusted_spike_protein_sequence = adjusted_s_gene_sequence.translate()
+        print("Adjusted spike glycoprotein sequence:")
+        print(adjusted_spike_protein_sequence)
+    else:
+        print("No start codon found near the annotated S gene start position.")
+    return adjusted_spike_protein_sequence
+
+
+def save_protein_fasta(protein_sequence, output_file, header=">protein"):
+    with open(output_file, "w") as f:
+        f.write(header + "\n")
+        f.write(str(protein_sequence) + "\n")
+
+# Replace with the path where you want to save the file
+output_file = "spike_glycoprotein.fasta"
+adjusted_spike_protein_sequence = find_orf(scaffolds)
+save_protein_fasta(adjusted_spike_protein_sequence, output_file)
diff --git a/mutatex/__init__.py b/mutatex/__init__.py