Skip to content

Commit

Permalink
upadte files
Browse files Browse the repository at this point in the history
  • Loading branch information
nilshof01 committed May 10, 2023
0 parents commit cfce8f7
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 0 deletions.
23 changes: 23 additions & 0 deletions felxibility_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


residues = ["417","445", "449","484","501"]
flex_wild = [0.994, 4.588, 2.988, 2.130,2.373]
flex_omikron = [0.797, 2.503, 1.354, 1.523, 1.134]

fig, ax = plt.subplots()

bar_width = 0.25
x1_positions = np.arange(0.375, len(residues), 1)
x2_positions = np.arange(0.375, len(residues), 1)
x3_positions = [x + bar_width for x in x2_positions]

ax.bar(x2_positions, flex_wild, width=bar_width, label='Wildtype')
ax.bar(x3_positions, flex_omikron, width=bar_width, label='B.1.1.529')
ax.set_xticks([x + bar_width for x in x1_positions])
ax.set_xticklabels(residues)
plt.legend()
plt.xlabel("Residues")
plt.ylabel("RMSF")
50 changes: 50 additions & 0 deletions find_orf_spike_protein.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from Bio import SeqIO

# Open the FASTA file and parse the nucleotide sequence




### find orf and
def find_orf(scaffolds, start_pos = 21562, end_pos = 25384):
with open(scaffolds, "r") as handle:
record = SeqIO.read(handle, "fasta")
nucleotide_seq = record.seq
s_gene_start = start_pos # Python uses 0-based indexing
s_gene_end = end_pos

# Find the closest ATG near the annotated start position
start_codon = "ATG"
search_start = max(0, s_gene_start - 50)
search_end = s_gene_start + 50

closest_atg_pos = -1
min_distance = float('inf')

for pos in range(search_start, search_end):
if record.seq[pos:pos+3] == start_codon:
distance = abs(s_gene_start - pos)
if distance < min_distance:
closest_atg_pos = pos
min_distance = distance

if closest_atg_pos != -1:
adjusted_s_gene_start = closest_atg_pos
adjusted_s_gene_sequence = record.seq[adjusted_s_gene_start:s_gene_end]
adjusted_spike_protein_sequence = adjusted_s_gene_sequence.translate()
print("Adjusted spike glycoprotein sequence:")
print(adjusted_spike_protein_sequence)
else:
print("No start codon found near the annotated S gene start position.")
return adjusted_spike_protein_sequence


def save_protein_fasta(protein_sequence, output_file, header=">protein"):
with open(output_file, "w") as f:
f.write(header + "\n")
f.write(str(protein_sequence) + "\n")

# Replace with the path where you want to save the file
output_file = "spike_glycoprotein.fasta"
adjusted_spike_protein_sequence = find_orf(scaffolds)
save_protein_fasta(adjusted_spike_protein_sequence, output_file)
Empty file added mutatex/__init__.py
Empty file.

0 comments on commit cfce8f7

Please sign in to comment.