-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit cfce8f7
Showing
3 changed files
with
73 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
|
||
|
||
residues = ["417","445", "449","484","501"] | ||
flex_wild = [0.994, 4.588, 2.988, 2.130,2.373] | ||
flex_omikron = [0.797, 2.503, 1.354, 1.523, 1.134] | ||
|
||
fig, ax = plt.subplots() | ||
|
||
bar_width = 0.25 | ||
x1_positions = np.arange(0.375, len(residues), 1) | ||
x2_positions = np.arange(0.375, len(residues), 1) | ||
x3_positions = [x + bar_width for x in x2_positions] | ||
|
||
ax.bar(x2_positions, flex_wild, width=bar_width, label='Wildtype') | ||
ax.bar(x3_positions, flex_omikron, width=bar_width, label='B.1.1.529') | ||
ax.set_xticks([x + bar_width for x in x1_positions]) | ||
ax.set_xticklabels(residues) | ||
plt.legend() | ||
plt.xlabel("Residues") | ||
plt.ylabel("RMSF") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from Bio import SeqIO | ||
|
||
# Open the FASTA file and parse the nucleotide sequence | ||
|
||
|
||
|
||
|
||
### find orf and | ||
def find_orf(scaffolds, start_pos = 21562, end_pos = 25384): | ||
with open(scaffolds, "r") as handle: | ||
record = SeqIO.read(handle, "fasta") | ||
nucleotide_seq = record.seq | ||
s_gene_start = start_pos # Python uses 0-based indexing | ||
s_gene_end = end_pos | ||
|
||
# Find the closest ATG near the annotated start position | ||
start_codon = "ATG" | ||
search_start = max(0, s_gene_start - 50) | ||
search_end = s_gene_start + 50 | ||
|
||
closest_atg_pos = -1 | ||
min_distance = float('inf') | ||
|
||
for pos in range(search_start, search_end): | ||
if record.seq[pos:pos+3] == start_codon: | ||
distance = abs(s_gene_start - pos) | ||
if distance < min_distance: | ||
closest_atg_pos = pos | ||
min_distance = distance | ||
|
||
if closest_atg_pos != -1: | ||
adjusted_s_gene_start = closest_atg_pos | ||
adjusted_s_gene_sequence = record.seq[adjusted_s_gene_start:s_gene_end] | ||
adjusted_spike_protein_sequence = adjusted_s_gene_sequence.translate() | ||
print("Adjusted spike glycoprotein sequence:") | ||
print(adjusted_spike_protein_sequence) | ||
else: | ||
print("No start codon found near the annotated S gene start position.") | ||
return adjusted_spike_protein_sequence | ||
|
||
|
||
def save_protein_fasta(protein_sequence, output_file, header=">protein"): | ||
with open(output_file, "w") as f: | ||
f.write(header + "\n") | ||
f.write(str(protein_sequence) + "\n") | ||
|
||
# Replace with the path where you want to save the file | ||
output_file = "spike_glycoprotein.fasta" | ||
adjusted_spike_protein_sequence = find_orf(scaffolds) | ||
save_protein_fasta(adjusted_spike_protein_sequence, output_file) |
Empty file.