From c645872a46a833833310dab4ab6d604f03a2f6eb Mon Sep 17 00:00:00 2001
From: Emma Bishop <bishemma1@gmail.com>
Date: Tue, 16 Aug 2022 14:05:23 -0700
Subject: [PATCH 1/5] Use Python3 print syntax

---
 01_extract_wells.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/01_extract_wells.py b/01_extract_wells.py
index 17d1caf..19fb4fa 100644
--- a/01_extract_wells.py
+++ b/01_extract_wells.py
@@ -55,8 +55,8 @@
     if set(line) <= DNA:
 	read_number+=1
         # print out statistics of progress
-	print '\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count,
-        line=line.replace('\n','')
+        print('\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count,
+        line=line.replace('\n',''))
         # T cell receptor read found
 	if line[2:7] in TCR_plate_barcodes and line[9:14] in TCR_row_barcodes and line[len(line)-7:-2] in TCR_column_barcodes and \
                 not 'N' in line and not 'AAAAAAAAAAAA' in line and not 'GGGGGGGGGGGG' in line and not 'CCCCCCCCCCCC' in line and not 'TTTTTTTTTTTT' in line:
@@ -72,7 +72,7 @@
                 unassigned+=1
 
 # statistics of assigned and unassigned sequences
-print 'Unassigned: ', unassigned, ' Assigned: ', assigned
+print('Unassigned: ', unassigned, ' Assigned: ', assigned)
 seq.close()
 
 # write sequences to files

From fc4731c6388532b547cf810c249999dc551d7c11 Mon Sep 17 00:00:00 2001
From: Emma Bishop <bishemma1@gmail.com>
Date: Tue, 16 Aug 2022 14:06:06 -0700
Subject: [PATCH 2/5] Fix indentation

Fixes errors from improper indentation (such as after if statements) and
inconsistent use of tabs and spaces (I used spaces).
---
 01_extract_wells.py                       | 24 +++++-----
 02_generate_IMGT_input_cytokine_output.py | 58 +++++++++++------------
 CdrExtraction.py                          | 40 ++++++++--------
 3 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/01_extract_wells.py b/01_extract_wells.py
index 19fb4fa..03f0fcf 100644
--- a/01_extract_wells.py
+++ b/01_extract_wells.py
@@ -53,23 +53,23 @@
 # parse fasta file line by line
 for line in seq:
     if set(line) <= DNA:
-	read_number+=1
+        read_number+=1
         # print out statistics of progress
         print('\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count,
         line=line.replace('\n',''))
         # T cell receptor read found
-	if line[2:7] in TCR_plate_barcodes and line[9:14] in TCR_row_barcodes and line[len(line)-7:-2] in TCR_column_barcodes and \
-                not 'N' in line and not 'AAAAAAAAAAAA' in line and not 'GGGGGGGGGGGG' in line and not 'CCCCCCCCCCCC' in line and not 'TTTTTTTTTTTT' in line:
-		location=str(TCR_plate_barcodes.index(line[2:7])+1) + str(chr(ord('A')+TCR_row_barcodes.index(line[9:14]))) + str(TCR_column_barcodes.index(line[len(line)-7:-2])+1)
-		if location in t_ass_seqs:
-			t_ass_seqs[location].append(revcompl(line[14:].replace('\n','')))
-		else:
-			t_ass_seqs[location]=[revcompl(line[14:].replace('\n', ''))]
-		tcr_count+=1
-		assigned+=1
-        # sequence not assigned
+    if line[2:7] in TCR_plate_barcodes and line[9:14] in TCR_row_barcodes and line[len(line)-7:-2] in TCR_column_barcodes and \
+            not 'N' in line and not 'AAAAAAAAAAAA' in line and not 'GGGGGGGGGGGG' in line and not 'CCCCCCCCCCCC' in line and not 'TTTTTTTTTTTT' in line:
+        location=str(TCR_plate_barcodes.index(line[2:7])+1) + str(chr(ord('A')+TCR_row_barcodes.index(line[9:14]))) + str(TCR_column_barcodes.index(line[len(line)-7:-2])+1)
+        if location in t_ass_seqs:
+            t_ass_seqs[location].append(revcompl(line[14:].replace('\n','')))
         else:
-                unassigned+=1
+            t_ass_seqs[location]=[revcompl(line[14:].replace('\n', ''))]
+        tcr_count+=1
+        assigned+=1
+    # sequence not assigned
+    else:
+        unassigned+=1
 
 # statistics of assigned and unassigned sequences
 print('Unassigned: ', unassigned, ' Assigned: ', assigned)
diff --git a/02_generate_IMGT_input_cytokine_output.py b/02_generate_IMGT_input_cytokine_output.py
index d579362..0f7daa8 100644
--- a/02_generate_IMGT_input_cytokine_output.py
+++ b/02_generate_IMGT_input_cytokine_output.py
@@ -21,39 +21,39 @@
 
 # sub-thread for processing one file
 def parse_file(filename, blast_cytokines):
-        # if option for blasting each read to identify cytokine reads is set
-        # generate temporary file without cytokine reads and proceed with it
-        if blast_cytokines:
-            tmp_file, cytokine_list = CdrExtraction.FileWithoutCytokines(filename)
-            possible_TCR_list, empty_cytokine_list = CdrExtraction.ParseWell(tmp_file)
-            os.unlink(tmp_file) 
-        else:
-	    possible_TCR_list, cytokine_list = CdrExtraction.ParseWell(filename)
+    # if option for blasting each read to identify cytokine reads is set
+    # generate temporary file without cytokine reads and proceed with it
+    if blast_cytokines:
+        tmp_file, cytokine_list = CdrExtraction.FileWithoutCytokines(filename)
+        possible_TCR_list, empty_cytokine_list = CdrExtraction.ParseWell(tmp_file)
+        os.unlink(tmp_file) 
+    else:
+        possible_TCR_list, cytokine_list = CdrExtraction.ParseWell(filename)
 
         out_imgt.write(CdrExtraction.HighV_QuestInput(filename.split('.')[0], possible_TCR_list))
-	out_cytokine.write(CdrExtraction.CytokineOutput(filename.split('.')[0], cytokine_list))
+    out_cytokine.write(CdrExtraction.CytokineOutput(filename.split('.')[0], cytokine_list))
 
 # main thread
 if __name__ == '__main__':
-	# parsing arguments
-	parser = argparse.ArgumentParser(description='Process files containing sequencing reads.')
-	parser.add_argument('--imgt_input', required=True, help='File that will contain input for IMGT High/V-Quest')
-	parser.add_argument('--cytokine_output', required=True, help='File that will contain output of cytokine reads')
-        parser.add_argument('-b','--blast_cytokines', help='Blast each read to identify cytokine reads', action='store_true')
-	args = parser.parse_args()
-
-	# files to be written
-	out_imgt = open(args.imgt_input, 'w',0)
-	out_cytokine = open(args.cytokine_output, 'w',0)
-	out_cytokine.write ('Well\t' + '\t'.join(CdrExtractionOptions.CYTOKINE_LIST.keys()) + '\n')
-
-	# starting sub-threads
-	pool = multiprocessing.Pool()
-	for filename in sorted(glob.glob('*.fasta')):
+    # parsing arguments
+    parser = argparse.ArgumentParser(description='Process files containing sequencing reads.')
+    parser.add_argument('--imgt_input', required=True, help='File that will contain input for IMGT High/V-Quest')
+    parser.add_argument('--cytokine_output', required=True, help='File that will contain output of cytokine reads')
+    parser.add_argument('-b','--blast_cytokines', help='Blast each read to identify cytokine reads', action='store_true')
+    args = parser.parse_args()
+
+    # files to be written
+    out_imgt = open(args.imgt_input, 'w',0)
+    out_cytokine = open(args.cytokine_output, 'w',0)
+    out_cytokine.write ('Well\t' + '\t'.join(CdrExtractionOptions.CYTOKINE_LIST.keys()) + '\n')
+
+    # starting sub-threads
+    pool = multiprocessing.Pool()
+    for filename in sorted(glob.glob('*.fasta')):
             pool.apply_async(parse_file, args=(filename, args.blast_cytokines))
 
-	# clean up
-	pool.close()
-	pool.join()
-	out_imgt.close()
-	out_cytokine.close()
+    # clean up
+    pool.close()
+    pool.join()
+    out_imgt.close()
+    out_cytokine.close()
diff --git a/CdrExtraction.py b/CdrExtraction.py
index 03f91a1..9f9c619 100644
--- a/CdrExtraction.py
+++ b/CdrExtraction.py
@@ -42,27 +42,27 @@ def CytokineOutput(wellname, cytokine_list):
 
 # generate file without cytokine reads and return temporary file plus cytokine list
 def FileWithoutCytokines(filename):
-       	# generate temporary file
-	tmp_file = tempfile.NamedTemporaryFile(delete=False)
-	cytokine_list = copy.deepcopy(CdrExtractionOptions.CYTOKINE_LIST)
-        
-        # read in all reads from sequence file 
-        sequences = ConsensusClusters.ReadSequences(filename)
-        
-        # go through all reads
-        for s in sequences:
-            # check if read contains cytokine and count it
-            cytokine = CytokineExtraction(sequences[s], CdrExtractionOptions.PATH_TO_CYTOKINE_DB)
-            if cytokine != '':
-                cytokine_list[cytokine]+=1
-            # if it does not contain cytokine, write to file
-            else:   
-                tmp_file.write('>' + s + '\n' + sequences[s] + '\n')
+    # generate temporary file
+    tmp_file = tempfile.NamedTemporaryFile(delete=False)
+    cytokine_list = copy.deepcopy(CdrExtractionOptions.CYTOKINE_LIST)
+
+    # read in all reads from sequence file 
+    sequences = ConsensusClusters.ReadSequences(filename)
         
-        # close temporary file
-       	tmp_file.close()
- 
-        return tmp_file.name, cytokine_list
+    # go through all reads
+    for s in sequences:
+        # check if read contains cytokine and count it
+        cytokine = CytokineExtraction(sequences[s], CdrExtractionOptions.PATH_TO_CYTOKINE_DB)
+        if cytokine != '':
+            cytokine_list[cytokine]+=1
+        # if it does not contain cytokine, write to file
+        else:   
+            tmp_file.write('>' + s + '\n' + sequences[s] + '\n')
+
+    # close temporary file
+    tmp_file.close()
+
+    return tmp_file.name, cytokine_list
 
 # generate input for IMGT HighV-Quest
 # >wellname:index:number of reads

From af01615d3b7888bdb0db6d75bc181b8454f276d4 Mon Sep 17 00:00:00 2001
From: Emma Bishop <bishemma1@gmail.com>
Date: Wed, 17 Aug 2022 14:56:39 -0700
Subject: [PATCH 3/5] Fix print statement

I accidentally included the following line in the print statement when
adding parentheses.
---
 01_extract_wells.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/01_extract_wells.py b/01_extract_wells.py
index 03f0fcf..89862f5 100644
--- a/01_extract_wells.py
+++ b/01_extract_wells.py
@@ -55,8 +55,8 @@
     if set(line) <= DNA:
         read_number+=1
         # print out statistics of progress
-        print('\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count,
-        line=line.replace('\n',''))
+        print('\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count)
+        line=line.replace('\n','')
         # T cell receptor read found
     if line[2:7] in TCR_plate_barcodes and line[9:14] in TCR_row_barcodes and line[len(line)-7:-2] in TCR_column_barcodes and \
             not 'N' in line and not 'AAAAAAAAAAAA' in line and not 'GGGGGGGGGGGG' in line and not 'CCCCCCCCCCCC' in line and not 'TTTTTTTTTTTT' in line:

From 77b7ab84d6e88b829da023ffc8fe265aea1dbd4e Mon Sep 17 00:00:00 2001
From: Emma Bishop <bishemma1@gmail.com>
Date: Wed, 17 Aug 2022 14:58:57 -0700
Subject: [PATCH 4/5] Tweak spacing a little more

---
 01_extract_wells.py                       | 2 +-
 02_generate_IMGT_input_cytokine_output.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/01_extract_wells.py b/01_extract_wells.py
index 89862f5..02ae480 100644
--- a/01_extract_wells.py
+++ b/01_extract_wells.py
@@ -57,7 +57,7 @@
         # print out statistics of progress
         print('\rAssigned', assigned, '/', read_number, 'TCR: ', tcr_count, 'BCR: ', bcr_count)
         line=line.replace('\n','')
-        # T cell receptor read found
+    # T cell receptor read found
     if line[2:7] in TCR_plate_barcodes and line[9:14] in TCR_row_barcodes and line[len(line)-7:-2] in TCR_column_barcodes and \
             not 'N' in line and not 'AAAAAAAAAAAA' in line and not 'GGGGGGGGGGGG' in line and not 'CCCCCCCCCCCC' in line and not 'TTTTTTTTTTTT' in line:
         location=str(TCR_plate_barcodes.index(line[2:7])+1) + str(chr(ord('A')+TCR_row_barcodes.index(line[9:14]))) + str(TCR_column_barcodes.index(line[len(line)-7:-2])+1)
diff --git a/02_generate_IMGT_input_cytokine_output.py b/02_generate_IMGT_input_cytokine_output.py
index 0f7daa8..9d1cbdb 100644
--- a/02_generate_IMGT_input_cytokine_output.py
+++ b/02_generate_IMGT_input_cytokine_output.py
@@ -29,8 +29,8 @@ def parse_file(filename, blast_cytokines):
         os.unlink(tmp_file) 
     else:
         possible_TCR_list, cytokine_list = CdrExtraction.ParseWell(filename)
-
         out_imgt.write(CdrExtraction.HighV_QuestInput(filename.split('.')[0], possible_TCR_list))
+
     out_cytokine.write(CdrExtraction.CytokineOutput(filename.split('.')[0], cytokine_list))
 
 # main thread
@@ -50,7 +50,7 @@ def parse_file(filename, blast_cytokines):
     # starting sub-threads
     pool = multiprocessing.Pool()
     for filename in sorted(glob.glob('*.fasta')):
-            pool.apply_async(parse_file, args=(filename, args.blast_cytokines))
+        pool.apply_async(parse_file, args=(filename, args.blast_cytokines))
 
     # clean up
     pool.close()

From 29d5c6d0deccc31c92ad3902f04659f48a8db90a Mon Sep 17 00:00:00 2001
From: Emma Bishop <bishemma1@gmail.com>
Date: Wed, 17 Aug 2022 17:12:16 -0700
Subject: [PATCH 5/5] Fix StringIO import for Python3 and buffering

Recieved an error when buffer was set to 0/off.
---
 02_generate_IMGT_input_cytokine_output.py | 4 ++--
 CdrExtraction.py                          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/02_generate_IMGT_input_cytokine_output.py b/02_generate_IMGT_input_cytokine_output.py
index 9d1cbdb..e72a7bb 100644
--- a/02_generate_IMGT_input_cytokine_output.py
+++ b/02_generate_IMGT_input_cytokine_output.py
@@ -43,8 +43,8 @@ def parse_file(filename, blast_cytokines):
     args = parser.parse_args()
 
     # files to be written
-    out_imgt = open(args.imgt_input, 'w',0)
-    out_cytokine = open(args.cytokine_output, 'w',0)
+    out_imgt = open(args.imgt_input, 'w')
+    out_cytokine = open(args.cytokine_output, 'w')
     out_cytokine.write ('Well\t' + '\t'.join(CdrExtractionOptions.CYTOKINE_LIST.keys()) + '\n')
 
     # starting sub-threads
diff --git a/CdrExtraction.py b/CdrExtraction.py
index 9f9c619..6ebb818 100644
--- a/CdrExtraction.py
+++ b/CdrExtraction.py
@@ -13,7 +13,7 @@
 from Bio.Blast.Applications import NcbiblastnCommandline
 from Bio.Blast import NCBIXML
 from Bio.Seq import Seq
-from cStringIO import StringIO
+from io import StringIO
 
 # extract cytokine from read
 def CytokineExtraction(SEQ, DB):