-
Notifications
You must be signed in to change notification settings - Fork 1
/
klibrate.py
executable file
·854 lines (677 loc) · 29.1 KB
/
klibrate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
from __future__ import division
import pdb
import sys
import getopt
import stats
import os
import gc
import random
import glob # for the LogMasher
import re # for the LogMasher
from datetime import datetime # for Cardenio
from numpy import *
from pylab import *
from scipy.optimize import leastsq
import matplotlib.pyplot as plt
from time import strftime
from datetime import datetime # for the timer in random seed (SanXoT)
import argparse # to load arguments easily
# libraries for SanXoTViewer
### PANDAS BLOCKED
# import pandas as pd # to load and merge tables
### WX BLOCKED
# import wx # to create the GUI
import re # for regular expressions
# import matplotlib.cbook as cbook # to check an iterable (remove if possible)
# next lines are important
# they are to include the library to the library.zip file
# which is shared with other programs of the same package
# DO NOT REMOVE
from xml.etree import ElementTree
from scipy.stats import norm
import sanxot
import aljamia
import subprocess
import pprint
import math
import sqlite3 # for Mojón
from optparse import OptionParser
#######################################################
def easterEgg():
print u"""
... pero dígame vuestra merced, señor bachiller: ¿qué hazañas mías son las que
más se ponderan en esa historia?
Don Quixote, Part Two, Chapter III."""
#------------------------------------------------------
def calibrate(inputRawData = None,
inputRelations = None,
rawDataFile = "",
relationsFile = "",
kSeed = 1,
varianceSeed = 0.001,
medianSide = 100,
maxIterations = 0,
verbose = False,
showGraph = False,
showSumSQ = False,
forceParameters = False,
alphaSeed = 1.0,
showRank = True,
useCooperativity = False,
graphFileVRank = "",
graphFileVValue = "",
graphDataFile = "",
graphDPI = None):
extraLog = []
extraLog.append([])
if verbose:
print
print "loading input raw data file"
extraLog.append(["loading input raw data file"])
if inputRawData == None:
if len(rawDataFile) == 0:
print "Error: no input raw data"
extraLog.append(["Error: no input raw data"])
else:
inputRawData = stats.loadInputDataFile(rawDataFile)
if verbose:
print "loading relations file"
extraLog.append(["loading relations file"])
if inputRelations == None:
if len(relationsFile) == 0:
if not forceParameters:
print "Error: no relations file"
extraLog.append(["Error: no relations file"])
return None, None, None, None, extraLog
else:
inputRelations = stats.loadRelationsFile(relationsFile)
#### calculate k and variance ####
alpha = 1.0
if not forceParameters:
if verbose:
print "calculating K and variance"
extraLog.append(["calculating K and variance"])
# *** just to see it
result = getKandVariance(inputRawData, inputRelations, kSeed = kSeed, varianceSeed = varianceSeed, maxIterations = maxIterations, verbose = True, showSumSQ = True, medianSide = medianSide, alphaSeed = alphaSeed, useCooperativity = useCooperativity)
k = result[0]
variance = result[1]
if useCooperativity: alpha = result[2]
else:
k = kSeed
variance = varianceSeed
alpha = alphaSeed
# save VRank graph
showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, showRank = True, graphFile = graphFileVRank, graphData = graphDataFile, dpi = graphDPI, showGraph = showGraph)
# save VValue graph
showGraphTool(inputRawData, inputRelations, k, variance, alpha, medianSide, showRank = False, graphFile = graphFileVValue, dpi = graphDPI, showGraph = showGraph)
# get calibrated idXV
idXV = idXVcal(inputRawData, k, alpha)
return idXV, variance, k, alpha, extraLog
#------------------------------------------------------
# moved from stats
def getMADDistribution(nextIdX,
mergedData,
k,
variance,
alpha,
medianSide = 100,
showGraph = False,
verbose = False):
MADconstant = 1.48260221850560 # *** 1 / DISTR.NORM.ESTAND.INV(3 / 4) get exact number
MADDistribution = []
distrWeight = []
# inputSequences = extractColumns(input, 0)
# outputSequences = extractColumns(output, 0)
newlist = []
for orow in nextIdX:
sequence = orow[0]
# it is important to avoid sorting to keep it fast
# so in next line do not foget sort = False
# this should arrive here already sorted
scanListWithSequence = stats.filterByElement(mergedData, sequence, sort = False)
if len(scanListWithSequence) > 1: # otherwise Xi = Xj --> Xi - Xj = 0 --> does not work
for scanRow in scanListWithSequence:
newrow = []
weight = scanRow[3] # the V
degreesOfFreedom = len(scanListWithSequence)
XiXj = scanRow[2] - orow[1]
newrow.append(sequence) # sequence = 0
newrow.append(scanRow[1]) # scan number = 1
newrow.append(XiXj) # Xi - Xj = 2
newrow.append(weight) # weight = 3
newrow.append(len(scanListWithSequence)) # degrees of freedom = 4
newrow.append(fabs(XiXj) * sqrt(float(degreesOfFreedom) / (float(degreesOfFreedom - 1)))) # = 5
newrow.append(0) # space to save the median = 6
newrow.append(0) # space to save the MAD formula = 7
newlist.append(newrow)
newlist = stats.sortByIndex(newlist, 3) # sort by weight
# get median + rank
nextlist = []
counter = 0
if len(newlist) < medianSide * 2:
if verbose:
print 'Not enough data to perform statistics,'
print 'len(newlist) = %s, while medianSide = %s' % (str(len(newlist)), str(medianSide))
sys.exit()
for i in range(len(newlist))[medianSide:len(newlist) - medianSide]:
window = newlist[i - medianSide:i + medianSide + 1]
median = stats.medianByIndex(window, 5)
newlist[i][6] = median
# fill the borders
for i in range(len(newlist))[:medianSide]:
newlist[i][6] = newlist[medianSide + 1][6]
for i in range(len(newlist))[len(newlist) - medianSide:]:
newlist[i][6] = newlist[len(newlist) - medianSide - 1][6]
# fill MAD formula
for i in range(len(newlist)):
newlist[i][7] = 1 / (MADconstant * newlist[i][6]) ** 2
MADDistribution.append(newlist[i][7])
distrWeight.append(newlist[i][3])
if verbose:
print 'k = %f, var = %f' % (k, variance)
return MADDistribution, distrWeight
#------------------------------------------------------
# moved from stats
def getInverseOfFit(mergedData, k, variance, alpha):
inverseOfFit = []
#input = stats.sortByIndex(input, 2)
for element in mergedData:
sequence = element[0]
# sort = False for speeding
scanListWithSequence = stats.filterByElement(mergedData, sequence, sort = False)
if len(scanListWithSequence) > 1:
weight = element[3]
inverseOfFit.append(getW_klibrate(weight, k, variance, alpha))
inverseOfFit = sort(inverseOfFit) # sort by weight
return inverseOfFit
#------------------------------------------------------
# copied to klibrate
def residuals(params, inputRawData, inputRelations, medianSide, verbose = False, showSumSQ = False, useCooperativity = False):
k = params[0]
variance = params[1]
alpha = 1.0
if useCooperativity: alpha = params[2]
inputRawData.sort()
inputRelations.sort()
windowWidth = medianSide * 2 + 1
if len(inputRawData) < windowWidth:
print 'Error: window for median is bigger than total input size'
sys.exit()
# output = makeStats(k, variance, input = input)
nextIdXData = getNextIdX_klibrate(inputRawData, inputRelations, k, variance, alpha, giveMergedData = True)
nextIdX = nextIdXData[0]
mergedData = nextIdXData[1]
experimental, weights = asarray(getMADDistribution(nextIdX, mergedData, k, variance, alpha, medianSide, verbose = False))
theoretical = asarray(getInverseOfFit(mergedData, k, variance, alpha))
experArray = asarray(experimental[medianSide:len(experimental) - medianSide + 1])
theorArray = asarray(theoretical[medianSide:len(experimental) - medianSide + 1])
if len(experArray) != len(theorArray):
print 'Error: experimental and theoretical array do not match'
sys.exit()
if verbose:
if showSumSQ and len(experArray) == len(theorArray):
totSumSQ = 0
for i in range(len(experArray)):
totSumSQ += (experArray[i] - theorArray[i]) ** 2
if useCooperativity:
print 'k = %g, var = %g, alpha = %g, sumSQ = %e' % (k, variance, alpha, totSumSQ)
else:
print 'k = %g, var = %g, sumSQ = %e' % (k, variance, totSumSQ)
else:
if useCooperativity:
print 'k = ' + str(k) + ', var = ' + str(variance) + ', alpha = ' + str(alpha)
else:
print 'k = ' + str(k) + ', var = ' + str(variance)
diff = experArray - theorArray
return diff
#------------------------------------------------------
def getNextIdX_klibrate(idXVall, relations, k = 1.0, variance = 0.0, alpha = 1.0, giveMergedData = False):
idX = []
# no need to sort: mergeInput will do it anyway
# idXVall.sort()
# relations.sort()
idXVWall = addW_klibrate(idXVall, k, variance, alpha)
mergedData = stats.mergeInput(idXVWall, relations)
position = 0
id2old = ""
XWlist = []
if len(mergedData) == 0:
print "Error, merged data list is empty. Please check the provided files do exist and are not corrupt."
sys.exit()
while position < len(mergedData):
id2 = mergedData[position][0]
if id2 != id2old and len(XWlist) > 0:
x2 = stats.getNextX(XWlist)
idX.append([id2old, x2])
XWlist = []
else:
XWspecific = [mergedData[position][2], mergedData[position][4]]
XWlist.append(XWspecific)
position += 1
id2old = id2
# the last one has not been added, so...
x2 = stats.getNextX(XWlist)
idX.append([id2old, x2])
if giveMergedData:
return [idX, mergedData]
else:
return idX
#------------------------------------------------------
def getW_klibrate(v, k = 1.0, variance = 0.0, alpha = 1.0):
w = 1 / (((k ** alpha) / (v ** alpha)) + variance)
return w
#------------------------------------------------------
def addW_klibrate(idXVlist, k = 1.0, variance = 0.0, alpha = 1.0):
idXVWlist = []
for row in idXVlist:
id = row[0]
x = row[1]
v = row[2]
w = getW_klibrate(v, k, variance, alpha)
idXVWlist.append([id, x, v, w])
return idXVWlist
#------------------------------------------------------
# copied to klibrate
def getKandVariance(inputRawData,
inputRelations,
maxIterations = 0,
kSeed = 1.0,
varianceSeed = 0.001,
medianSide = 100,
verbose = False,
showSumSQ = False,
alphaSeed = 1.0,
useCooperativity = False):
if useCooperativity:
seed = [kSeed, varianceSeed, alphaSeed]
bestSol, success = leastsq(residuals, seed, args = (inputRawData, inputRelations, medianSide, verbose, showSumSQ, useCooperativity), maxfev = maxIterations)
else:
seed = [kSeed, varianceSeed]
bestSol, success = leastsq(residuals, seed, args = (inputRawData, inputRelations, medianSide, verbose, showSumSQ, useCooperativity), maxfev = maxIterations)
kOut = bestSol[0]
varianceOut = bestSol[1]
if useCooperativity: alphaOut = bestSol[2]
# kOut = k
# varianceOut = variance
if verbose:
print
print "K = " + str(kOut)
print "Variance = " + str(varianceOut)
if useCooperativity: print "Alpha = " + str(alphaOut)
if useCooperativity: return [kOut, varianceOut, alphaOut]
else: return [kOut, varianceOut]
#------------------------------------------------------
def showGraphTool(inputRawData,
inputRelations,
k,
variance,
alpha,
medianSide,
verbose = False,
showRank = False,
graphFile = None,
graphData = None,
dpi = None,
showGraph = True):
plt.clf()
inputRawData.sort()
inputRelations.sort()
windowWidth = medianSide * 2 + 1
if len(inputRawData) < windowWidth:
print 'Error: window for median is bigger than total input size'
sys.exit()
# output = makeStats(k, variance, input = input)
nextIdXData = getNextIdX_klibrate(inputRawData, inputRelations, k, variance, alpha, giveMergedData = True)
nextIdX = nextIdXData[0]
mergedData = nextIdXData[1]
MADdistrOut, weights = getMADDistribution(nextIdX, mergedData, k, variance, alpha, medianSide)
invOfFitOut = getInverseOfFit(mergedData, k, variance, alpha)
MADdistrOut = MADdistrOut[medianSide:len(MADdistrOut) - medianSide + 1]
invOfFitOut = invOfFitOut[medianSide:len(invOfFitOut) - medianSide + 1]
weights = weights[medianSide:len(weights) - medianSide + 1]
# folderToSave = "D:\\DATUMARO\\trabajo\\programas_repositorio\\BioSistemas SanXoT\\MicroArrays\\intento5 CvsCs usando var robusta por partes\\"
# stats.saveFile(folderToSave + "MADdistrOut.txt", MADdistrOut)
# stats.saveFile(folderToSave + "invOfFitOut.txt", invOfFitOut)
# stats.saveFile(folderToSave + "weights.txt", weights)
if showRank:
plt.plot(range(len(MADdistrOut)), MADdistrOut, 'g.', range(len(invOfFitOut)), invOfFitOut, 'r', linewidth=1.0, markersize=2.0, markeredgewidth=0.0)
plt.xlabel('rank($V_s$)')
plt.ylabel('1 / MSD')
# to save data
# *** use a better filename
dataToSave = []
for i in xrange(len(MADdistrOut)):
dataToSave.append([i, weights[i], MADdistrOut[i], invOfFitOut[i]])
if graphData:
stats.saveFile(graphData, dataToSave, "rank(Vs)\tweight\tMAD\t1/fit")
else:
# uncomment to graph MSD instead of 1 / MSD
#
# for i in xrange(len(invOfFitOut)):
# invOfFitOut[i] = 1 / invOfFitOut[i]
# for i in xrange(len(MADdistrOut)):
# MADdistrOut[i] = 1 / MADdistrOut[i]
plt.plot(weights, MADdistrOut, 'g.', weights, invOfFitOut, 'r', linewidth=1.0, markersize=2.0, markeredgewidth=0.0)
plt.xlabel('($V_s$)')
plt.ylabel('1 / MSD')
plt.grid(True)
plt.title('k = %g, $\sigma^2$ = %g, alpha = %g' % (k, variance, alpha))
if graphFile:
plt.savefig(graphFile, dpi = dpi)
if showGraph:
plt.show()
#------------------------------------------------------
def extractKFromVarFile(kFile, verbose = True, defaultSeed = 1.0):
resultValue, varianceOk = stats.extractVariableFromInfoFile(varFile, varName = "K", defaultSeed = defaultSeed, verbose = verbose)
return resultValue, varianceOk
#------------------------------------------------------
def idXVcal(idXVlist, k = 1.0, alpha = 1.0):
idXVcal = []
for row in idXVlist:
id = row[0]
x = row[1]
v = row[2]
vcal = getW_klibrate(v, k = k, alpha = alpha)
idXVcal.append([id, x, vcal])
return idXVcal
#------------------------------------------------------
def printHelp(version):
# add alpha constant
print """
Klibrate %s is a program made in the Jesus Vazquez Cardiovascular Proteomics
Lab at Centro Nacional de Investigaciones Cardiovasculares, used to perform the
calibration of experimental data, as a first step to integrate these data into
higher levels along with the SanXoT program.
To perform the calibration two parameters have to be calculated: the k (weight
constant), and the variance. They are calculated iteratively using the
Levenberg-Marquardt algorithm, starting from the seeds the user introduces
(it is possible to perform the calculation without the iterative calculation by
forcing both parameters with the -f option). In the integration that follows
the variance can be recalculated.
Klibrate needs two input files:
* the original data file, containing unique identifiers of each scan, such
as "RawFile05.raw-scan19289-charge2" or "File05B_scannumber12877_z3", the
Xi which corresponds to the log2(A/B), and the Vi which corresponds to the
weight of the measure).
* the relations file, containing a first column with the higher level
identifiers (such as the peptide sequence, for example "CGLAGCGLLK", or
the protein, if you wish to directly integrate scans into proteins, such
as the Uniprot Accession Numbers "P01308" or KEGG Gene ID "hsa:3630"), and
the lower level identifiers within the abovementioned original data file
(such as "RawFile05.raw-scan19289-charge2").
And delivers the output calibrated file:
* the calibrated data file, containing the same information as the
original data file, but changing the values of the third column
(containing the weights) to adapt the information to the calibrated
weights that can be used as input in the SanXoT program.
Usage: klibrate.py [OPTIONS] -r[relations file] -d[original data file] -o[calibrated output file]
-h, --help Display this help and exit.
-a, --analysis=string
Use a prefix for the output files. If this is not
provided, then the prefix will be garnered from the data
file.
-b, --no-verbose Do not print result summary after executing.
-d, --datafile Input data file with text identificators in the first
column, measured values (x) in the second column, and
uncalibrated weights (v) in the third column.
-D, --outgraphdata=filename
To use a non-default name for the data used to create
calibration graph files.
-f, --forceparameters
Use the parameters (k and variance) as provided, without
using the Levenberg-Marquardt algorithm.
-g, --no-showgraph Do not show the rank(V) vs 1 / MSD graph after
the calculation.
-G, --outgraphvvalue=filename
To use a non-default name for the graph file which shows
the value of V (the weight) versus 1 / MSD.
-k, --kseed Seed for the weight constant. Default is k = 1.
-K, --kfile=filename
Get the K value from a text file. It must contain a line
(not more than once) with the text "K = [float]". This
suits the info file from another integration (see -L).
-L, --infofile=filename
To use a non-default name for the info file.
-m, --maxiterations Maximum number of iterations performed by the Levenberg-
Marquardt algorithm to calculate the variance and the k
constant. If unused, the default value of the algorithm
is taken.
-o, --outputfile To use a non-default output calibrated file name (see
above for more information on this file).
-p, --place, --folder=foldername
To use a different common folder for the output files.
If this is not provided, the the folder used will be the
same as the input folder.
-r, --relfile, --relationsfile
Relations file, with identificators of the higher level
in the first column, and identificators of the lower
level in the second column.
-R, --outgraphvrank=filename
To use a non-default name for the graph file which shows
the rank of V (the weight) versus 1 / MSD.
-s, --no-showsteps Do not print result summary and steps of each Levenberg-
Marquardt iteration.
-v, --var, --varianceseed
Seed used to start calculating the variance.
Default is 0.001.
-V, --varfile=filename
Get the variance value from a text file. It must contain
a line (not more than once) with the text
"Variance = [double]". This suits the info file from
another integration (see -L).
-w, --window The amount of weight-ordered lower level elements
(scans, usually) that are taken at a time to calculate
the median of the weight, which is compared to the fit;
default is 200.
examples:
* To calculate the variance and k starting with a seed v = 0.03 and k = 40, printing the steps of the Levenberg-Marquardt algorithm and results, showing the rank(Vs) vs 1 / MSD graph afterwards:
klibrate.py -gbs -v0.03 -k40 -dC:\\temp\\originalDataFile.txt -rC:\\temp\\relationsFile.txt -oC:\\temp\\calibratedWeights.tsv
* To get fast results of an integration forcing a variance = 0.02922 and a k = 35.28:
klibrate.py -f -v0.02922 -k35.28 -dC:\\temp\\originalDataFile.txt -rC:\\temp\\relationsFile.txt -oC:\\temp\\calibratedWeights.tsv
* To see the graph resulting from a calculation with variance = 0.02922 and a k = 35.28:
klibrate.py -gf -v0.02922 -k35.28 -dC:\\temp\\originalDataFile.txt -rC:\\temp\\relationsFile.txt -oC:\\temp\\calibratedWeights.tsv
""" % version
return
#------------------------------------------------------
def main(argv):
version = "v1.18"
verbose = True
showGraph = True
graphDPI = 100 # default of Matplotlib's savefig method
showSteps = True
forceParameters = False
kSeed = 1.0
varianceSeed = 0.001
alphaSeed = 1.0
useCooperativity = False
medianSide = 100
maxIterations = 0
dataFile = ""
relationsFile = ""
outputCalibrated = ""
infoFile = ""
kFile = ""
kSeedProvided = False
varFile = ""
varianceSeedProvided = False
graphFileVRank = ""
graphFileVValue = ""
graphDataFile = ""
showRank = False
analysisName = ""
defaultAnalysisName = "klibrate"
analysisFolder = ""
logList = [["Klibrate " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]
defaultDataFile = "data"
defaultRelationsFile = "rels"
defaultOutputInfo = "infoFile"
defaultOutputGraphVRank = "outGraph_VRank"
defaultOutputGraphVValue = "outGraph_VValue"
defaultGraphDataFile = "outGraph_Data"
defaultOutputCalibrated = "calibrated"
defaultTableExtension = ".tsv"
defaultTextExtension = ".txt"
defaultGraphExtension = ".png"
try:
opts, args = getopt.getopt(argv, "a:p:k:v:c:d:r:o:w:m:L:G:D:R:K:V:bgsfh", ["analysis=", "folder=", "kseed=", "varianceseed=", "alphaseed=", "datafile=", "relfile=", "outputfile=", "window=", "maxiterations=", "infofile=", "outgraphvrank=", "outgraphvvalue=", "outgraphdata=", "kfile=", "varfile=", "no-verbose", "no-showgraph", "no-showsteps", "forceparameters", "showrank", "help", "egg", "easteregg"])
except getopt.GetoptError:
message = "Error while getting parameters."
print message
logList.append([message])
# stats.saveFile(infoFile, logList, "INFO FILE")
sys.exit(2)
if len(opts) == 0:
printHelp(version)
sys.exit()
for opt, arg in opts:
if opt in ("-a", "--analysis"):
analysisName = arg
if opt in ("-p", "--place", "--folder"):
analysisFolder = arg
if opt in ("-k", "--kseed"):
kSeed = float(arg)
kSeedProvided = True
elif opt in ("-v", "--var", "--varianceseed"):
varianceSeed = float(arg)
varianceSeedProvided = True
elif opt in ("-c", "--alphaseed"):
useCooperativity = True
alphaSeed = float(arg)
elif opt in ("-d", "--datafile"):
dataFile = arg
elif opt in ("-r", "--relfile", "--relationsfile"):
relationsFile = arg
elif opt in ("-o", "--outputfile"):
outputCalibrated = arg
elif opt in ("-w", "--window"):
windowWidth = round(float(arg))
if windowWidth % 2 == 0:
windowWidth += 1
medianSide = int((windowWidth - 1) / 2)
elif opt in ("-b", "--no-verbose"):
verbose = False
elif opt in ("-g", "--no-showgraph"):
showGraph = False
elif opt in ("-s", "--no-showsteps"):
showSteps = False
elif opt in ("-m", "--maxiterations"):
maxIterations = int(arg)
elif opt in ("-L", "--infofile"):
infoFile = arg
elif opt in ("-G", "--outgraphvvalue"):
graphFileVValue = arg
elif opt in ("-D", "--outgraphdata"):
graphDataFile = arg
elif opt in ("-K", "--kfile"):
kFile = arg
elif opt in ("-V", "--varfile"):
varFile = arg
elif opt in ("-f", "--forceparameters"):
forceParameters = True
elif opt in ("-R", "--outgraphvrank"):
graphFileVRank = arg
elif opt in ("-h", "--help"):
printHelp(version)
sys.exit()
elif opt in ("--egg", "--easteregg"):
easterEgg()
sys.exit()
verbose = verbose or showSteps
# REGION: FILE NAMES SETUP
if len(analysisName) == 0:
if len(dataFile) > 0:
analysisName = os.path.splitext(os.path.basename(dataFile))[0]
else:
analysisName = defaultAnalysisName
if len(os.path.dirname(analysisName)) > 0:
analysisNameFirstPart = os.path.dirname(analysisName)
analysisName = os.path.basename(analysisName)
if len(analysisFolder) == 0:
analysisFolder = analysisNameFirstPart
if len(dataFile) > 0 and len(analysisFolder) == 0:
if len(os.path.dirname(dataFile)) > 0:
analysisFolder = os.path.dirname(dataFile)
# input
if len(dataFile) == 0:
dataFile = os.path.join(analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension)
if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0:
dataFile = os.path.join(analysisFolder, dataFile)
if len(relationsFile) == 0:
relationsFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension)
if len(os.path.dirname(relationsFile)) == 0:
relationsFile = os.path.join(analysisFolder, relationsFile)
if len(os.path.dirname(varFile)) == 0 and len(os.path.basename(varFile)) > 0:
varFile = os.path.join(analysisFolder, varFile)
if len(varFile) > 0 and not varianceSeedProvided:
varianceSeed, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = verbose, defaultSeed = varianceSeed)
if not varianceOk:
logList.append(["Variance not found in text file"])
stats.saveFile(infoFile, logList, "INFO FILE")
sys.exit()
if len(os.path.dirname(kFile)) == 0 and len(os.path.basename(kFile)) > 0:
kFile = os.path.join(analysisFolder, kFile)
if len(kFile) > 0 and not kSeedProvided:
kSeed, KOk = stats.extractKFromKFile(kFile, verbose = verbose, defaultSeed = kSeed)
if not KOk:
logList.append(["K not found in text file."])
stats.saveFile(infoFile, logList, "INFO FILE")
sys.exit()
# output
if len(outputCalibrated) == 0:
outputCalibrated = os.path.join(analysisFolder, analysisName + "_" + defaultOutputCalibrated + defaultTableExtension)
else:
if len(os.path.dirname(outputCalibrated)) == 0:
outputCalibrated = os.path.join(analysisFolder, outputCalibrated)
if len(graphFileVRank) == 0:
graphFileVRank = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGraphVRank + defaultGraphExtension)
if len(graphFileVValue) == 0:
graphFileVValue = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGraphVValue + defaultGraphExtension)
if len(graphDataFile) == 0:
graphDataFile = os.path.join(analysisFolder, analysisName + "_" + defaultGraphDataFile + defaultTableExtension)
if len(infoFile) == 0:
infoFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension)
logList.append(["Variance seed = " + str(varianceSeed)])
logList.append(["K seed = " + str(kSeed)])
if useCooperativity: logList.append(["Alpha seed = " + str(alphaSeed)])
logList.append(["Input data file: " + dataFile])
logList.append(["Input relations file: " + relationsFile])
logList.append(["Output calibrated file: " + outputCalibrated])
logList.append(["Output info file: " + infoFile])
logList.append(["Output V rank graph file: " + graphFileVRank])
logList.append(["Output V graph file: " + graphFileVValue])
logList.append(["Output data file for graph: " + graphDataFile])
logList.append(["Parameters forced: " + str(forceParameters)])
logList.append(["Max iterations: " + str(maxIterations)])
# END REGION: FILE NAMES SETUP
calibratedData, variance, k, alpha, extraLog = calibrate(rawDataFile = dataFile,
relationsFile = relationsFile, kSeed = kSeed, varianceSeed = varianceSeed,
medianSide = medianSide, maxIterations = maxIterations, verbose = showSteps,
showGraph = showGraph, forceParameters = forceParameters, alphaSeed = alphaSeed,
showRank = showRank, useCooperativity = useCooperativity, graphFileVRank = graphFileVRank, graphFileVValue = graphFileVValue, graphDataFile = graphDataFile, graphDPI = graphDPI)
logList.extend(extraLog)
if not calibratedData:
if len(infoFile) > 0:
stats.saveFile(infoFile, logList, "INFO FILE")
sys.exit()
if len(outputCalibrated) > 0:
stats.saveFile(outputCalibrated, calibratedData, "id\tX\tVcal")
logList.append([])
logList.append(["K = " + str(k)])
logList.append(["Variance = " + str(variance)])
if useCooperativity: logList.append(["Alpha = " + str(alpha)])
if len(infoFile) > 0:
stats.saveFile(infoFile, logList, "INFO FILE")
if verbose:
print
print "*** results ***"
print "k = " + str(k)
print "variance = " + str(variance)
if useCooperativity: print "alpha = " + str(alpha)
print
print "Output calibrated file in: " + outputCalibrated
print "Graph with rank of V in: " + graphFileVRank
print "Graph with value of V in: " + graphFileVValue
print "Info file in: " + infoFile
#######################################################
if __name__ == "__main__":
main(sys.argv[1:])