@@ -1333,8 +1333,8 @@ def topHat_map(self, input_fastq, output_dir, genome, transcriptome, cpus):
13331333 return cmd
13341334
13351335 def picard_mark_duplicates (self , input_bam , output_bam , metrics_file , temp_dir = "." ):
1336- transient_file = re .sub ("\.bam$" , "" , output_bam ) + ".dups.nosort.bam"
1337- output_bam = re .sub ("\.bam$" , "" , output_bam )
1336+ transient_file = re .sub (r "\.bam$" , "" , output_bam ) + ".dups.nosort.bam"
1337+ output_bam = re .sub (r "\.bam$" , "" , output_bam )
13381338 cmd1 = self .tools .java + " -Xmx" + self .pm .javamem
13391339 cmd1 += " -jar `which MarkDuplicates.jar`"
13401340 cmd1 += " INPUT={0}" .format (input_bam )
@@ -1374,7 +1374,7 @@ def filter_reads(
13741374 Remove duplicates, filter for >Q, remove multiple mapping reads.
13751375 For paired-end reads, keep only proper pairs.
13761376 """
1377- nodups = re .sub ("\.bam$" , "" , output_bam ) + ".nodups.nofilter.bam"
1377+ nodups = re .sub (r "\.bam$" , "" , output_bam ) + ".nodups.nofilter.bam"
13781378 cmd1 = (
13791379 self .tools .sambamba
13801380 + " markdup -t {0} -r --compression-level=0 {1} {2} 2> {3}" .format (
@@ -1406,7 +1406,7 @@ def shift_reads(self, input_bam, genome, output_bam):
14061406 return cmd
14071407
14081408 def sort_index_bam (self , input_bam , output_bam ):
1409- tmp_bam = re .sub ("\.bam" , ".sorted" , input_bam )
1409+ tmp_bam = re .sub (r "\.bam" , ".sorted" , input_bam )
14101410 cmd1 = self .tools .samtools + " sort {0} {1}" .format (input_bam , tmp_bam )
14111411 cmd2 = "mv {0}.bam {1}" .format (tmp_bam , output_bam )
14121412 cmd3 = self .tools .samtools + " index {0}" .format (output_bam )
@@ -1638,7 +1638,7 @@ def bam_to_bigwig(
16381638 # addjust fragment length dependent on read size and real fragment size
16391639 # (right now it asssumes 50bp reads with 180bp fragments)
16401640 cmds = list ()
1641- transient_file = os .path .abspath (re .sub ("\.bigWig" , "" , output_bigwig ))
1641+ transient_file = os .path .abspath (re .sub (r "\.bigWig" , "" , output_bigwig ))
16421642 cmd1 = self .tools .bedtools + " bamtobed -i {0} |" .format (input_bam )
16431643 if not tagmented :
16441644 cmd1 += (
@@ -2050,39 +2050,41 @@ def parse_bowtie_stats(self, stats_file):
20502050 line = [
20512051 i for i in range (len (content )) if " reads; of these:" in content [i ]
20522052 ][0 ]
2053- stats ["readCount" ] = re .sub ("\D.*" , "" , content [line ])
2053+ stats ["readCount" ] = re .sub (r "\D.*" , "" , content [line ])
20542054 if 7 > len (content ) > 2 :
20552055 line = [
20562056 i
20572057 for i in range (len (content ))
20582058 if "were unpaired; of these:" in content [i ]
20592059 ][0 ]
2060- stats ["unpaired" ] = re .sub ("\D" , "" , re .sub ("\(.*" , "" , content [line ]))
2060+ stats ["unpaired" ] = re .sub (
2061+ r"\D" , "" , re .sub (r"\(.*" , "" , content [line ])
2062+ )
20612063 else :
20622064 line = [
20632065 i
20642066 for i in range (len (content ))
20652067 if "were paired; of these:" in content [i ]
20662068 ][0 ]
20672069 stats ["unpaired" ] = stats ["readCount" ] - int (
2068- re .sub ("\D" , "" , re .sub ("\(.*" , "" , content [line ]))
2070+ re .sub (r "\D" , "" , re .sub (r "\(.*" , "" , content [line ]))
20692071 )
20702072 line = [i for i in range (len (content )) if "aligned 0 times" in content [i ]][
20712073 0
20722074 ]
2073- stats ["unaligned" ] = re .sub ("\D" , "" , re .sub ("\(.*" , "" , content [line ]))
2075+ stats ["unaligned" ] = re .sub (r "\D" , "" , re .sub (r "\(.*" , "" , content [line ]))
20742076 line = [
20752077 i for i in range (len (content )) if "aligned exactly 1 time" in content [i ]
20762078 ][0 ]
2077- stats ["unique" ] = re .sub ("\D" , "" , re .sub ("\(.*" , "" , content [line ]))
2079+ stats ["unique" ] = re .sub (r "\D" , "" , re .sub (r "\(.*" , "" , content [line ]))
20782080 line = [i for i in range (len (content )) if "aligned >1 times" in content [i ]][
20792081 0
20802082 ]
2081- stats ["multiple" ] = re .sub ("\D" , "" , re .sub ("\(.*" , "" , content [line ]))
2083+ stats ["multiple" ] = re .sub (r "\D" , "" , re .sub (r "\(.*" , "" , content [line ]))
20822084 line = [
20832085 i for i in range (len (content )) if "overall alignment rate" in content [i ]
20842086 ][0 ]
2085- stats ["alignmentRate" ] = re .sub ("\%.*" , "" , content [line ]).strip ()
2087+ stats ["alignmentRate" ] = re .sub (r "\%.*" , "" , content [line ]).strip ()
20862088 except IndexError :
20872089 pass
20882090 return stats
@@ -2107,22 +2109,24 @@ def parse_duplicate_stats(self, stats_file):
21072109 for i in range (len (content ))
21082110 if "single ends (among them " in content [i ]
21092111 ][0 ]
2110- series ["single-ends" ] = re .sub ("\D" , "" , re .sub ("\(.*" , "" , content [line ]))
2112+ series ["single-ends" ] = re .sub (
2113+ r"\D" , "" , re .sub (r"\(.*" , "" , content [line ])
2114+ )
21112115 line = [
21122116 i
21132117 for i in range (len (content ))
21142118 if " end pairs... done in " in content [i ]
21152119 ][0 ]
21162120 series ["paired-ends" ] = re .sub (
2117- "\D" , "" , re .sub ("\.\.\..*" , "" , content [line ])
2121+ r "\D" , "" , re .sub (r "\.\.\..*" , "" , content [line ])
21182122 )
21192123 line = [
21202124 i
21212125 for i in range (len (content ))
21222126 if " duplicates, sorting the list... done in " in content [i ]
21232127 ][0 ]
21242128 series ["duplicates" ] = re .sub (
2125- "\D" , "" , re .sub ("\.\.\..*" , "" , content [line ])
2129+ r "\D" , "" , re .sub (r "\.\.\..*" , "" , content [line ])
21262130 )
21272131 except IndexError :
21282132 pass
@@ -2158,7 +2162,7 @@ def get_peak_number(self, sample):
21582162 """
21592163 proc = subprocess .Popen (["wc" , "-l" , sample .peaks ], stdout = subprocess .PIPE )
21602164 out , err = proc .communicate ()
2161- sample ["peakNumber" ] = re .sub ("\D.*" , "" , out )
2165+ sample ["peakNumber" ] = re .sub (r "\D.*" , "" , out )
21622166 return sample
21632167
21642168 def get_frip (self , sample ):
@@ -2171,6 +2175,6 @@ def get_frip(self, sample):
21712175
21722176 with open (sample .frip , "r" ) as handle :
21732177 content = handle .readlines ()
2174- reads_in_peaks = int (re .sub ("\D" , "" , content [0 ]))
2178+ reads_in_peaks = int (re .sub (r "\D" , "" , content [0 ]))
21752179 mapped_reads = sample ["readCount" ] - sample ["unaligned" ]
21762180 return pd .Series (reads_in_peaks / mapped_reads , index = "FRiP" )
0 commit comments