Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Syntax fixes #1

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions 01_extract_wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,26 @@
# parse fasta file line by line
for line in seq:
if set(line) <= DNA:
read_number+=1
read_number+=1
# print out statistics of progress
print '\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count,
print('\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count)
line=line.replace('\n','')
# T cell receptor read found
if line[2:7] in TCR_plate_barcodes and line[9:14] in TCR_row_barcodes and line[len(line)-7:-2] in TCR_column_barcodes and \
not 'N' in line and not 'AAAAAAAAAAAA' in line and not 'GGGGGGGGGGGG' in line and not 'CCCCCCCCCCCC' in line and not 'TTTTTTTTTTTT' in line:
location=str(TCR_plate_barcodes.index(line[2:7])+1) + str(chr(ord('A')+TCR_row_barcodes.index(line[9:14]))) + str(TCR_column_barcodes.index(line[len(line)-7:-2])+1)
if location in t_ass_seqs:
t_ass_seqs[location].append(revcompl(line[14:].replace('\n','')))
else:
t_ass_seqs[location]=[revcompl(line[14:].replace('\n', ''))]
tcr_count+=1
assigned+=1
# sequence not assigned
# T cell receptor read found
if line[2:7] in TCR_plate_barcodes and line[9:14] in TCR_row_barcodes and line[len(line)-7:-2] in TCR_column_barcodes and \
not 'N' in line and not 'AAAAAAAAAAAA' in line and not 'GGGGGGGGGGGG' in line and not 'CCCCCCCCCCCC' in line and not 'TTTTTTTTTTTT' in line:
location=str(TCR_plate_barcodes.index(line[2:7])+1) + str(chr(ord('A')+TCR_row_barcodes.index(line[9:14]))) + str(TCR_column_barcodes.index(line[len(line)-7:-2])+1)
if location in t_ass_seqs:
t_ass_seqs[location].append(revcompl(line[14:].replace('\n','')))
else:
unassigned+=1
t_ass_seqs[location]=[revcompl(line[14:].replace('\n', ''))]
tcr_count+=1
assigned+=1
# sequence not assigned
else:
unassigned+=1

# statistics of assigned and unassigned sequences
print 'Unassigned: ', unassigned, ' Assigned: ', assigned
print('Unassigned: ', unassigned, ' Assigned: ', assigned)
seq.close()

# write sequences to files
Expand Down
64 changes: 32 additions & 32 deletions 02_generate_IMGT_input_cytokine_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,39 @@

# sub-thread for processing one file
def parse_file(filename, blast_cytokines):
# if option for blasting each read to identify cytokine reads is set
# generate temporary file without cytokine reads and proceed with it
if blast_cytokines:
tmp_file, cytokine_list = CdrExtraction.FileWithoutCytokines(filename)
possible_TCR_list, empty_cytokine_list = CdrExtraction.ParseWell(tmp_file)
os.unlink(tmp_file)
else:
possible_TCR_list, cytokine_list = CdrExtraction.ParseWell(filename)

# if option for blasting each read to identify cytokine reads is set
# generate temporary file without cytokine reads and proceed with it
if blast_cytokines:
tmp_file, cytokine_list = CdrExtraction.FileWithoutCytokines(filename)
possible_TCR_list, empty_cytokine_list = CdrExtraction.ParseWell(tmp_file)
os.unlink(tmp_file)
else:
possible_TCR_list, cytokine_list = CdrExtraction.ParseWell(filename)
out_imgt.write(CdrExtraction.HighV_QuestInput(filename.split('.')[0], possible_TCR_list))
out_cytokine.write(CdrExtraction.CytokineOutput(filename.split('.')[0], cytokine_list))

out_cytokine.write(CdrExtraction.CytokineOutput(filename.split('.')[0], cytokine_list))

# main thread
if __name__ == '__main__':
# parsing arguments
parser = argparse.ArgumentParser(description='Process files containing sequencing reads.')
parser.add_argument('--imgt_input', required=True, help='File that will contain input for IMGT High/V-Quest')
parser.add_argument('--cytokine_output', required=True, help='File that will contain output of cytokine reads')
parser.add_argument('-b','--blast_cytokines', help='Blast each read to identify cytokine reads', action='store_true')
args = parser.parse_args()

# files to be written
out_imgt = open(args.imgt_input, 'w',0)
out_cytokine = open(args.cytokine_output, 'w',0)
out_cytokine.write ('Well\t' + '\t'.join(CdrExtractionOptions.CYTOKINE_LIST.keys()) + '\n')

# starting sub-threads
pool = multiprocessing.Pool()
for filename in sorted(glob.glob('*.fasta')):
pool.apply_async(parse_file, args=(filename, args.blast_cytokines))

# clean up
pool.close()
pool.join()
out_imgt.close()
out_cytokine.close()
# parsing arguments
parser = argparse.ArgumentParser(description='Process files containing sequencing reads.')
parser.add_argument('--imgt_input', required=True, help='File that will contain input for IMGT High/V-Quest')
parser.add_argument('--cytokine_output', required=True, help='File that will contain output of cytokine reads')
parser.add_argument('-b','--blast_cytokines', help='Blast each read to identify cytokine reads', action='store_true')
args = parser.parse_args()

# files to be written
out_imgt = open(args.imgt_input, 'w')
out_cytokine = open(args.cytokine_output, 'w')
out_cytokine.write ('Well\t' + '\t'.join(CdrExtractionOptions.CYTOKINE_LIST.keys()) + '\n')

# starting sub-threads
pool = multiprocessing.Pool()
for filename in sorted(glob.glob('*.fasta')):
pool.apply_async(parse_file, args=(filename, args.blast_cytokines))

# clean up
pool.close()
pool.join()
out_imgt.close()
out_cytokine.close()
42 changes: 21 additions & 21 deletions CdrExtraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML
from Bio.Seq import Seq
from cStringIO import StringIO
from io import StringIO

# extract cytokine from read
def CytokineExtraction(SEQ, DB):
Expand Down Expand Up @@ -42,27 +42,27 @@ def CytokineOutput(wellname, cytokine_list):

# generate file without cytokine reads and return temporary file plus cytokine list
def FileWithoutCytokines(filename):
# generate temporary file
tmp_file = tempfile.NamedTemporaryFile(delete=False)
cytokine_list = copy.deepcopy(CdrExtractionOptions.CYTOKINE_LIST)

# read in all reads from sequence file
sequences = ConsensusClusters.ReadSequences(filename)

# go through all reads
for s in sequences:
# check if read contains cytokine and count it
cytokine = CytokineExtraction(sequences[s], CdrExtractionOptions.PATH_TO_CYTOKINE_DB)
if cytokine != '':
cytokine_list[cytokine]+=1
# if it does not contain cytokine, write to file
else:
tmp_file.write('>' + s + '\n' + sequences[s] + '\n')
# generate temporary file
tmp_file = tempfile.NamedTemporaryFile(delete=False)
cytokine_list = copy.deepcopy(CdrExtractionOptions.CYTOKINE_LIST)

# read in all reads from sequence file
sequences = ConsensusClusters.ReadSequences(filename)

# close temporary file
tmp_file.close()

return tmp_file.name, cytokine_list
# go through all reads
for s in sequences:
# check if read contains cytokine and count it
cytokine = CytokineExtraction(sequences[s], CdrExtractionOptions.PATH_TO_CYTOKINE_DB)
if cytokine != '':
cytokine_list[cytokine]+=1
# if it does not contain cytokine, write to file
else:
tmp_file.write('>' + s + '\n' + sequences[s] + '\n')

# close temporary file
tmp_file.close()

return tmp_file.name, cytokine_list

# generate input for IMGT HighV-Quest
# >wellname:index:number of reads
Expand Down