HansmannLab · emjbishop · Aug 16, 2022 · Aug 16, 2022 · Aug 17, 2022 · Aug 17, 2022
diff --git a/01_extract_wells.py b/01_extract_wells.py
@@ -53,26 +53,26 @@
 # parse fasta file line by line
 for line in seq:
     if set(line) <= DNA:
-	read_number+=1
+        read_number+=1
         # print out statistics of progress
-	print '\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count,
+        print('\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count)
         line=line.replace('\n','')
-        # T cell receptor read found
-	if line[2:7] in TCR_plate_barcodes and line[9:14] in TCR_row_barcodes and line[len(line)-7:-2] in TCR_column_barcodes and \
-                not 'N' in line and not 'AAAAAAAAAAAA' in line and not 'GGGGGGGGGGGG' in line and not 'CCCCCCCCCCCC' in line and not 'TTTTTTTTTTTT' in line:
-		location=str(TCR_plate_barcodes.index(line[2:7])+1) + str(chr(ord('A')+TCR_row_barcodes.index(line[9:14]))) + str(TCR_column_barcodes.index(line[len(line)-7:-2])+1)
-		if location in t_ass_seqs:
-			t_ass_seqs[location].append(revcompl(line[14:].replace('\n','')))
-		else:
-			t_ass_seqs[location]=[revcompl(line[14:].replace('\n', ''))]
-		tcr_count+=1
-		assigned+=1
-        # sequence not assigned
+    # T cell receptor read found
+    if line[2:7] in TCR_plate_barcodes and line[9:14] in TCR_row_barcodes and line[len(line)-7:-2] in TCR_column_barcodes and \
+            not 'N' in line and not 'AAAAAAAAAAAA' in line and not 'GGGGGGGGGGGG' in line and not 'CCCCCCCCCCCC' in line and not 'TTTTTTTTTTTT' in line:
+        location=str(TCR_plate_barcodes.index(line[2:7])+1) + str(chr(ord('A')+TCR_row_barcodes.index(line[9:14]))) + str(TCR_column_barcodes.index(line[len(line)-7:-2])+1)
+        if location in t_ass_seqs:
+            t_ass_seqs[location].append(revcompl(line[14:].replace('\n','')))
         else:
-                unassigned+=1
+            t_ass_seqs[location]=[revcompl(line[14:].replace('\n', ''))]
+        tcr_count+=1
+        assigned+=1
+    # sequence not assigned
+    else:
+        unassigned+=1
 
 # statistics of assigned and unassigned sequences
-print 'Unassigned: ', unassigned, ' Assigned: ', assigned
+print('Unassigned: ', unassigned, ' Assigned: ', assigned)
 seq.close()
 
 # write sequences to files

diff --git a/02_generate_IMGT_input_cytokine_output.py b/02_generate_IMGT_input_cytokine_output.py
@@ -21,39 +21,39 @@
 
 # sub-thread for processing one file
 def parse_file(filename, blast_cytokines):
-        # if option for blasting each read to identify cytokine reads is set
-        # generate temporary file without cytokine reads and proceed with it
-        if blast_cytokines:
-            tmp_file, cytokine_list = CdrExtraction.FileWithoutCytokines(filename)
-            possible_TCR_list, empty_cytokine_list = CdrExtraction.ParseWell(tmp_file)
-            os.unlink(tmp_file) 
-        else:
-	    possible_TCR_list, cytokine_list = CdrExtraction.ParseWell(filename)
-
+    # if option for blasting each read to identify cytokine reads is set
+    # generate temporary file without cytokine reads and proceed with it
+    if blast_cytokines:
+        tmp_file, cytokine_list = CdrExtraction.FileWithoutCytokines(filename)
+        possible_TCR_list, empty_cytokine_list = CdrExtraction.ParseWell(tmp_file)
+        os.unlink(tmp_file) 
+    else:
+        possible_TCR_list, cytokine_list = CdrExtraction.ParseWell(filename)
         out_imgt.write(CdrExtraction.HighV_QuestInput(filename.split('.')[0], possible_TCR_list))
-	out_cytokine.write(CdrExtraction.CytokineOutput(filename.split('.')[0], cytokine_list))
+
+    out_cytokine.write(CdrExtraction.CytokineOutput(filename.split('.')[0], cytokine_list))
 
 # main thread
 if __name__ == '__main__':
-	# parsing arguments
-	parser = argparse.ArgumentParser(description='Process files containing sequencing reads.')
-	parser.add_argument('--imgt_input', required=True, help='File that will contain input for IMGT High/V-Quest')
-	parser.add_argument('--cytokine_output', required=True, help='File that will contain output of cytokine reads')
-        parser.add_argument('-b','--blast_cytokines', help='Blast each read to identify cytokine reads', action='store_true')
-	args = parser.parse_args()
-
-	# files to be written
-	out_imgt = open(args.imgt_input, 'w',0)
-	out_cytokine = open(args.cytokine_output, 'w',0)
-	out_cytokine.write ('Well\t' + '\t'.join(CdrExtractionOptions.CYTOKINE_LIST.keys()) + '\n')
-
-	# starting sub-threads
-	pool = multiprocessing.Pool()
-	for filename in sorted(glob.glob('*.fasta')):
-            pool.apply_async(parse_file, args=(filename, args.blast_cytokines))
-
-	# clean up
-	pool.close()
-	pool.join()
-	out_imgt.close()
-	out_cytokine.close()
+    # parsing arguments
+    parser = argparse.ArgumentParser(description='Process files containing sequencing reads.')
+    parser.add_argument('--imgt_input', required=True, help='File that will contain input for IMGT High/V-Quest')
+    parser.add_argument('--cytokine_output', required=True, help='File that will contain output of cytokine reads')
+    parser.add_argument('-b','--blast_cytokines', help='Blast each read to identify cytokine reads', action='store_true')
+    args = parser.parse_args()
+
+    # files to be written
+    out_imgt = open(args.imgt_input, 'w')
+    out_cytokine = open(args.cytokine_output, 'w')
+    out_cytokine.write ('Well\t' + '\t'.join(CdrExtractionOptions.CYTOKINE_LIST.keys()) + '\n')
+
+    # starting sub-threads
+    pool = multiprocessing.Pool()
+    for filename in sorted(glob.glob('*.fasta')):
+        pool.apply_async(parse_file, args=(filename, args.blast_cytokines))
+
+    # clean up
+    pool.close()
+    pool.join()
+    out_imgt.close()
+    out_cytokine.close()
diff --git a/CdrExtraction.py b/CdrExtraction.py
@@ -13,7 +13,7 @@
 from Bio.Blast.Applications import NcbiblastnCommandline
 from Bio.Blast import NCBIXML
 from Bio.Seq import Seq
-from cStringIO import StringIO
+from io import StringIO
 
 # extract cytokine from read
 def CytokineExtraction(SEQ, DB):
@@ -42,27 +42,27 @@ def CytokineOutput(wellname, cytokine_list):
 
 # generate file without cytokine reads and return temporary file plus cytokine list
 def FileWithoutCytokines(filename):
-       	# generate temporary file
-	tmp_file = tempfile.NamedTemporaryFile(delete=False)
-	cytokine_list = copy.deepcopy(CdrExtractionOptions.CYTOKINE_LIST)
-
-        # read in all reads from sequence file 
-        sequences = ConsensusClusters.ReadSequences(filename)
-
-        # go through all reads
-        for s in sequences:
-            # check if read contains cytokine and count it
-            cytokine = CytokineExtraction(sequences[s], CdrExtractionOptions.PATH_TO_CYTOKINE_DB)
-            if cytokine != '':
-                cytokine_list[cytokine]+=1
-            # if it does not contain cytokine, write to file
-            else:   
-                tmp_file.write('>' + s + '\n' + sequences[s] + '\n')
+    # generate temporary file
+    tmp_file = tempfile.NamedTemporaryFile(delete=False)
+    cytokine_list = copy.deepcopy(CdrExtractionOptions.CYTOKINE_LIST)
+
+    # read in all reads from sequence file 
+    sequences = ConsensusClusters.ReadSequences(filename)
 
-        # close temporary file
-       	tmp_file.close()
-
-        return tmp_file.name, cytokine_list
+    # go through all reads
+    for s in sequences:
+        # check if read contains cytokine and count it
+        cytokine = CytokineExtraction(sequences[s], CdrExtractionOptions.PATH_TO_CYTOKINE_DB)
+        if cytokine != '':
+            cytokine_list[cytokine]+=1
+        # if it does not contain cytokine, write to file
+        else:   
+            tmp_file.write('>' + s + '\n' + sequences[s] + '\n')
+
+    # close temporary file
+    tmp_file.close()
+
+    return tmp_file.name, cytokine_list
 
 # generate input for IMGT HighV-Quest
 # >wellname:index:number of reads