Skip to content

Commit a5eea44

Browse files
committed
Added option to skip jackknife error estimation
1 parent b2dca9e commit a5eea44

File tree

2 files changed

+22
-15
lines changed

2 files changed

+22
-15
lines changed

RASCalculator.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
parser.add_argument("-C", "--NrChroms", type=int, metavar="<INT>", default=22, required=False, help="The number of chromosomes in the dataset. [22]")
2828
parser.add_argument("-d", "--details", action='store_true', help="Print RAS calculations for each allele frequency, in addition to the total.")
2929
parser.add_argument("--f3FreqCutoff", type=float, metavar="<FREQ>", default=0.05, help="minimum minor allele frequency for f3 values. Defaults to 0.05. This is recommended to skip rare mutations and reduce the sensitivity of the F3 statistics on sequencing errors and DNA damage.")
30+
parser.add_argument("--skipJackknife", action='store_true', default=False, help="When provided, the jackknife error is not estimated, but instead set to 0. [False]")
3031
args = parser.parse_args()
3132

3233
if args.minAF<1:
@@ -42,6 +43,8 @@
4243
NumBins=args.NrChroms
4344
minAF=args.minAF
4445
maxAF=args.maxAF
46+
skipJK=args.skipJackknife
47+
4548
# RightIndex={} #Holds the INDICES of the Right pops
4649
# LeftsIndex={} #Holds the INDICES of the Left pops
4750

@@ -160,7 +163,7 @@ def getTotalDerivedAC(afDict):
160163
for x in range(len(LeftPops)):
161164
for j in range(len(TestPops)):
162165
for i in range(minAF-1,maxAF+2):
163-
thetaJ,sigma2 = ras.getJackknife(RAS[x][j][i], mj[x][j], blockSizes)
166+
thetaJ,sigma2 = ras.getJackknife(RAS[x][j][i], mj[x][j], blockSizes, skipJK)
164167
ThetaJ[x][j][i] = thetaJ
165168
Sigma2[x][j][i] = sigma2
166169

RASUtils.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -40,31 +40,35 @@ def __readHeader(self):
4040
self.sizes[popName] = popSize
4141
print("#Available populations in Input File and their respective sizes: ", self.sizes, file=self.output)
4242

43-
def getJackknife(blockValues, totalObservations, blockSizes):
43+
def getJackknife(blockValues, totalObservations, blockSizes, skipJackknife):
4444
thetaminus=[0 for x in range(len(blockSizes))]
4545
sum1=0
4646
sum2=0
4747
jackknifeStdErr=0
4848
if sum(totalObservations)==0:
49+
## If no observations were made, the rare allele sharing rate is 0.
4950
thetahat=0
5051
else:
52+
## thetahat is normalised by number of observations
5153
thetahat=sum(blockValues)/sum(totalObservations)
52-
53-
## Normalise blockValues.
54-
normalisedValues = [0.0 for c in range(len(blockSizes))]
54+
5555
for c in range(len(blockSizes)):
56-
if totalObservations[c] == 0:
57-
continue
56+
if totalObservations[c] == sum(totalObservations):
57+
## If all rare alleles are on a single chunk, the ras/site without that chunk is 0.
58+
thetaminus[c]=0
5859
else:
59-
normalisedValues[c] = blockValues[c]/totalObservations[c]
60-
61-
for c in range(len(blockSizes)):
62-
thetaminus[c]=( (sum(blockValues)-blockValues[c]) / (sum(totalObservations)-totalObservations[c]) )
60+
## thetaminus is normalised by number of observations
61+
thetaminus[c]=( (sum(blockValues)-blockValues[c]) / (sum(totalObservations)-totalObservations[c]) )
62+
63+
## Jackknife estimator calculation
6364
sum1+=thetahat-thetaminus[c]
6465
sum2+=(blockSizes[c]*thetaminus[c])/sum(blockSizes)
6566
jackknifeEstimator=sum1+sum2
66-
for c in range(len(blockSizes)):
67-
hj=sum(blockSizes)/blockSizes[c]
68-
pseudoval = (hj*thetahat)-((hj-1)*thetaminus[c])
69-
jackknifeStdErr+=(1/len(blockSizes))*(((pseudoval-jackknifeEstimator)**2)/(hj-1))
67+
68+
## Standard error calculation
69+
if not skipJackknife:
70+
for c in range(len(blockSizes)):
71+
hj=sum(blockSizes)/blockSizes[c]
72+
pseudoval = (hj*thetahat)-((hj-1)*thetaminus[c])
73+
jackknifeStdErr+=(1/len(blockSizes))*(((pseudoval-jackknifeEstimator)**2)/(hj-1))
7074
return (jackknifeEstimator,jackknifeStdErr)

0 commit comments

Comments
 (0)