-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasic.py
More file actions
109 lines (88 loc) · 3.74 KB
/
basic.py
File metadata and controls
109 lines (88 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import subprocess
def get_all_136_tetramers():
"""Generate and return all 136 unique tetrameric contexts."""
def reverse_complement(seq):
complement = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'}
return ''.join(complement[base] for base in reversed(seq))
bases = ['A', 'T', 'G', 'C']
unique_tetramers = set()
seen_structures = set()
for b1 in bases:
for b2 in bases:
for b3 in bases:
for b4 in bases:
tetramer = b1 + b2 + b3 + b4
rev_comp = reverse_complement(tetramer)
canonical = min(tetramer, rev_comp)
if canonical not in seen_structures:
seen_structures.add(canonical)
unique_tetramers.add(tetramer)
return sorted(unique_tetramers)
def create_tetramer_test_sequence(target_tetramer, length, repeat_count=10):
"""
Create a DNA sequence enriched with the target tetramer.
Args:
target_tetramer: The 4-base sequence to test (e.g., 'ATAG')
length: Total length in base pairs
repeat_count: How many times to insert the tetramer
"""
# Start with a neutral background sequence (alternating GC is common)
# or use the complement of your tetramer to avoid creating unintended contexts
background = 'GC' * (length // 2)
seq_list = list(background[:length])
# Calculate spacing to distribute tetramers evenly
spacing = length // (repeat_count + 1)
# Insert target tetramer at regular intervals
for i in range(repeat_count):
pos = (i + 1) * spacing
if pos + 4 <= length:
seq_list[pos:pos+4] = list(target_tetramer)
# Ensure circularity doesn't create unwanted tetramer at junction
final_seq = ''.join(seq_list)
return final_seq
def run_emdna_optimization(test_case, model='tetrameric', output_dir='results'):
"""
Run emDNA optimization for a single test case.
Args:
test_case: Dict with sequence, linking_number, etc.
model: 'tetrameric' or 'dimeric' (Cohen2017)
output_dir: Where to save results
"""
# Create input file for emDNA
input_file = f"{output_dir}/{test_case['name']}_{model}.inp"
output_file = f"{output_dir}/{test_case['name']}_{model}.out"
# Write sequence and parameters to input file
# (Format depends on emDNA's input specification)
with open(input_file, 'w') as f:
f.write(f"SEQUENCE {test_case['sequence']}\n")
f.write(f"LENGTH {test_case['length']}\n")
f.write(f"LINKING_NUMBER {test_case['linking_number']}\n")
f.write(f"MODEL {model}\n")
# Run emDNA
cmd = f"emDNA -i {input_file} -o {output_file}"
subprocess.run(cmd, shell=True, check=True)
return output_file
def main():
# Parameters matching the student's work
lengths = range(80, 151) # 80-150 bp
linking_numbers_per_length = lambda L: [
L/10.5 - 1, # Negatively supercoiled
L/10.5, # Relaxed
L/10.5 + 1 # Positively supercoiled
]
# Generate all combinations
test_cases = []
for tetramer in get_all_136_tetramers():
for length in lengths:
seq = create_tetramer_test_sequence(tetramer, length)
for lk in linking_numbers_per_length(length):
test_cases.append({
'tetramer': tetramer,
'sequence': seq,
'length': length,
'linking_number': lk,
'name': f"{tetramer}_{length}bp_lk{lk:.1f}"
})
print(f"Total test cases: {len(test_cases)}")
# 136 tetramers × 71 lengths × 3 linking numbers = 28,968 simulations