From 313ade5f2995106cc8b599f6e8619f59c90e7d9a Mon Sep 17 00:00:00 2001 From: Rajendra Kumar Date: Mon, 20 Mar 2017 17:41:07 +0100 Subject: [PATCH] update normalization thershold. update browser list widget --- .../genomicsDataHandler/hdf5handler.rst | 1 + docs/apidoc/summary.rst | 1 + gcMapExplorer/clui/normIC.py | 26 +- gcMapExplorer/clui/normKR.py | 28 +- gcMapExplorer/clui/normMCFS.py | 25 +- gcMapExplorer/gui/UIs/importer.ui | 416 +++++++++++++++++- gcMapExplorer/gui/UIs/mainWindow.ui | 5 +- gcMapExplorer/gui/UIs/normalizer.ui | 115 ++++- .../gui/UIs/other1DFormatFileConversion.ui | 7 + gcMapExplorer/gui/UIs/userColorMapDialog.ui | 7 + gcMapExplorer/gui/browser.py | 39 +- gcMapExplorer/gui/importer_ui.py | 152 ++++++- gcMapExplorer/gui/normalizer_ui.py | 26 +- gcMapExplorer/lib/gcmap.py | 18 +- gcMapExplorer/lib/genomicsDataHandler.py | 23 +- gcMapExplorer/lib/normalizer.py | 128 +++++- setup.py | 2 +- 17 files changed, 949 insertions(+), 70 deletions(-) diff --git a/docs/apidoc/genomicsDataHandler/hdf5handler.rst b/docs/apidoc/genomicsDataHandler/hdf5handler.rst index 5158894..16c223a 100644 --- a/docs/apidoc/genomicsDataHandler/hdf5handler.rst +++ b/docs/apidoc/genomicsDataHandler/hdf5handler.rst @@ -6,6 +6,7 @@ .. autosummary:: HDF5Handler + HDF5Handler.setTitle HDF5Handler.getChromList HDF5Handler.getResolutionList HDF5Handler.getDataNameList diff --git a/docs/apidoc/summary.rst b/docs/apidoc/summary.rst index 480676f..6276985 100644 --- a/docs/apidoc/summary.rst +++ b/docs/apidoc/summary.rst @@ -105,6 +105,7 @@ genomicsDataHandler module .. autosummary:: genomicsDataHandler.HDF5Handler + genomicsDataHandler.HDF5Handler.setTitle genomicsDataHandler.HDF5Handler.getChromList genomicsDataHandler.HDF5Handler.getResolutionList genomicsDataHandler.HDF5Handler.getDataNameList diff --git a/gcMapExplorer/clui/normIC.py b/gcMapExplorer/clui/normIC.py index 669de01..3efd947 100644 --- a/gcMapExplorer/clui/normIC.py +++ b/gcMapExplorer/clui/normIC.py @@ -77,6 +77,20 @@ """ +vminHelp = \ +""" Minimum thershold value for normalization. +If contact frequency is less than or equal to this thershold value, +this value is discarded during normalization. + +""" + +vmaxHelp = \ +""" Maximum thershold value for normalization. +If contact frequency is greater than or equal to this thershold value, +this value is discarded during normalization. + +""" + tolHelp = \ """ Tolerance for matrix balancing. Smaller tolreance increases accuracy in sums of rows and columns. @@ -161,13 +175,14 @@ def main(): if args.outputFileFormat == 'ccmap' and args.inputFileFormat == 'ccmap': gmlib.normalizer.normalizeCCMapByIC(args.inputFile, tol=args.tol, iteration=args.iteration, - outFile=args.outputFile, + outFile=args.outputFile, vmin=args.vmin, vmax=args.vmax, percentile_thershold_no_data=args.percentile_thershold_no_data, thershold_data_occup=args.thershold_data_occup, workDir=args.workDir) if args.outputFileFormat == 'gcmap' and args.inputFileFormat == 'ccmap': norm_ccmap = gmlib.normalizer.normalizeCCMapByIC(args.inputFile, tol=args.tol, iteration=args.iteration, + vmin=args.vmin, vmax=args.vmax, percentile_thershold_no_data=args.percentile_thershold_no_data, thershold_data_occup=args.thershold_data_occup, workDir=args.workDir) @@ -177,6 +192,7 @@ def main(): if args.outputFileFormat == 'gcmap' and args.inputFileFormat == 'gcmap': gmlib.normalizer.normalizeGCMapByIC(args.inputFile, args.outputFile, tol=args.tol, iteration=args.iteration, + vmin=args.vmin, vmax=args.vmax, percentile_thershold_no_data=args.percentile_thershold_no_data, thershold_data_occup=args.thershold_data_occup, workDir=args.workDir) @@ -212,6 +228,14 @@ def parseArguments(): type = float, default=1e-4, help=tolHelp) + parser.add_argument('-vmax', '--maximum-value', action='store', + dest='vmax', type=float, metavar=None, + help=vminHelp) + + parser.add_argument('-vmin', '--minimum-value', action='store', + dest='vmin', metavar=None, type=float, + help=vmaxHelp) + parser.add_argument('-c', '--iteration', action='store', dest='iteration', metavar=500, type = int, default=500, diff --git a/gcMapExplorer/clui/normKR.py b/gcMapExplorer/clui/normKR.py index 9e5e010..39d6e5f 100644 --- a/gcMapExplorer/clui/normKR.py +++ b/gcMapExplorer/clui/normKR.py @@ -87,6 +87,20 @@ """ +vminHelp = \ +""" Minimum thershold value for normalization. +If contact frequency is less than or equal to this thershold value, +this value is discarded during normalization. + +""" + +vmaxHelp = \ +""" Maximum thershold value for normalization. +If contact frequency is greater than or equal to this thershold value, +this value is discarded during normalization. + +""" + mapSizeCeilingForMemoryHelp = \ """ Maximum size of contact map allowed for calculation using RAM. If map size or shape is larger than this value, normalization will be @@ -177,13 +191,15 @@ def main(): showErrorAndExit(parser, msg) if args.outputFileFormat == 'ccmap' and args.inputFileFormat == 'ccmap': - gmlib.normalizer.normalizeCCMapByKR(args.inputFile, memory=args.memory, tol=args.tol, outFile=args.outputFile, + gmlib.normalizer.normalizeCCMapByKR(args.inputFile, memory=args.memory, tol=args.tol, + vmin=args.vmin, vmax=args.vmax, outFile=args.outputFile, percentile_thershold_no_data=args.percentile_thershold_no_data, thershold_data_occup=args.thershold_data_occup, workDir=args.workDir) if args.outputFileFormat == 'gcmap' and args.inputFileFormat == 'ccmap': norm_ccmap = gmlib.normalizer.normalizeCCMapByKR(args.inputFile, memory=args.memory, tol=args.tol, + vmin=args.vmin, vmax=args.vmax, percentile_thershold_no_data=args.percentile_thershold_no_data, thershold_data_occup=args.thershold_data_occup, workDir=args.workDir) @@ -193,7 +209,7 @@ def main(): if args.outputFileFormat == 'gcmap' and args.inputFileFormat == 'gcmap': gmlib.normalizer.normalizeGCMapByKR(args.inputFile, args.outputFile, mapSizeCeilingForMemory=args.mapSizeCeilingForMemory, - tol=args.tol, + tol=args.tol, vmin=args.vmin, vmax=args.vmax, percentile_thershold_no_data=args.percentile_thershold_no_data, thershold_data_occup=args.thershold_data_occup, workDir=args.workDir) @@ -234,6 +250,14 @@ def parseArguments(): choices=['RAM', 'HDD'], default='RAM', help=memoryHelp) + parser.add_argument('-vmax', '--maximum-value', action='store', + dest='vmax', type=float, metavar=None, + help=vminHelp) + + parser.add_argument('-vmin', '--minimum-value', action='store', + dest='vmin', metavar=None, type=float, + help=vmaxHelp) + parser.add_argument('-mscm', '--map-size-ceiling-for-memory', action='store', dest='mapSizeCeilingForMemory', metavar='20000', type = int, diff --git a/gcMapExplorer/clui/normMCFS.py b/gcMapExplorer/clui/normMCFS.py index 06a50e0..d5afca7 100644 --- a/gcMapExplorer/clui/normMCFS.py +++ b/gcMapExplorer/clui/normMCFS.py @@ -79,6 +79,20 @@ """ +vminHelp = \ +""" Minimum thershold value for normalization. +If contact frequency is less than or equal to this thershold value, +this value is discarded during normalization. + +""" + +vmaxHelp = \ +""" Maximum thershold value for normalization. +If contact frequency is greater than or equal to this thershold value, +this value is discarded during normalization. + +""" + percentile_thershold_no_data_help = \ """ It can be used to filter the map, where rows/columns with largest numbers of missing data can be discarded. Its value should be between 1 and 100. @@ -142,12 +156,14 @@ def main(): if args.outputFileFormat == 'ccmap' and args.inputFileFormat == 'ccmap': gmlib.normalizer.normalizeCCMapByMCFS(args.inputFile, stats=args.stats, outFile=args.outputFile, + vmin=args.vmin, vmax=args.vmax, percentile_thershold_no_data=args.percentile_thershold_no_data, thershold_data_occup=args.thershold_data_occup, workDir=args.workDir) if args.outputFileFormat == 'gcmap' and args.inputFileFormat == 'ccmap': norm_ccmap = gmlib.normalizer.normalizeCCMapByMCFS(args.inputFile, stats=args.stats, + vmin=args.vmin, vmax=args.vmax, percentile_thershold_no_data=args.percentile_thershold_no_data, thershold_data_occup=args.thershold_data_occup, workDir=args.workDir) @@ -156,7 +172,7 @@ def main(): if args.outputFileFormat == 'gcmap' and args.inputFileFormat == 'gcmap': gmlib.normalizer.normalizeGCMapByMCFS(args.inputFile, args.outputFile, - stats=args.stats, + stats=args.stats, vmin=args.vmin, vmax=args.vmax, percentile_thershold_no_data=args.percentile_thershold_no_data, thershold_data_occup=args.thershold_data_occup, compression=args.compression, @@ -190,6 +206,13 @@ def parseArguments(): dest='stats', metavar='median', choices=['median', 'mean'], default='median', help=statsHelp) + parser.add_argument('-vmax', '--maximum-value', action='store', + dest='vmax', type=float, metavar=None, + help=vminHelp) + + parser.add_argument('-vmin', '--minimum-value', action='store', + dest='vmin', metavar=None, type=float, + help=vmaxHelp) parser.add_argument('-ptnd', '--percentile-thershold-no-data', action='store', dest='percentile_thershold_no_data', metavar=99, diff --git a/gcMapExplorer/gui/UIs/importer.ui b/gcMapExplorer/gui/UIs/importer.ui index 9feaf4a..0f2d177 100644 --- a/gcMapExplorer/gui/UIs/importer.ui +++ b/gcMapExplorer/gui/UIs/importer.ui @@ -6,7 +6,7 @@ 0 0 - 900 + 699 983 @@ -90,7 +90,28 @@ cbin1 cbin2 expected_count observed_count . . ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Both BIN and CONTACT files are neccessary for the conversion. +Both BIN and CONTACT files are neccessary for the conversion. + + +4) Paired sparse matrix Coordinate (COO) format: This format is very similar to COO format with addiotional infromation of chromosome. Therefore, maps for all chromosome could be contained in a single +file. This type of format appeared with following publication: + http://dx.doi.org/10.1016/j.cell.2015.10.026 + GEO: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE72512 + +Following file format can be read as a text file, where first and second column is location on chromosome and third column is the value: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + chr4 60000 75000 chr4 60000 75000 0.1163470887070292 + chr4 60000 75000 chr4 105000 120000 0.01292745430078102 + chr4 60000 75000 chr4 435000 450000 0.01292745430078102 + chr4 75000 90000 chr4 75000 90000 0.05170981720312409 + chr4 75000 90000 chr4 345000 360000 0.01292745430078102 + chr4 90000 105000 chr4 90000 105000 0.01292745430078102 + . + . + . + . +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + @@ -107,6 +128,11 @@ Both BIN and CONTACT files are neccessary for the conversion. Bins-Contact List + + + Pair COO Matrix + + @@ -124,8 +150,18 @@ Both BIN and CONTACT files are neccessary for the conversion. + + + 9 + 75 + false + true + true + true + + - + ?? false @@ -157,7 +193,7 @@ Both BIN and CONTACT files are neccessary for the conversion. 0 0 - 866 + 665 553 @@ -604,7 +640,7 @@ Presently, only "tar.gz" and "zip" compressed files are supp Output gcmap file. - <html><head/><body><p>Output gcmap file.</p></body></html> + Name of output gcmap file. All maps will be saved in this file. @@ -993,7 +1029,7 @@ Presently, only "tar.gz" and "zip" compressed files are supp To save all maps in a gcmap file - To save all maps in a gcmap file + Name of output gcmap file. All maps will be saved in this file. @@ -1429,7 +1465,7 @@ cbin1 cbin2 expected_count observed_count To save all maps in a gcmap file - To save all maps in a gcmap file + Name of output gcmap file. All maps will be saved in this file. @@ -1654,6 +1690,372 @@ cbin1 cbin2 expected_count observed_count + + + 4 + + + + + + + + Input File: + + + + + + + Input BIN file + + + Name of input file. + +Piared COO matrix format is as follows: + + chr4 60000 75000 chr4 60000 75000 0.1163470887070292 + chr4 60000 75000 chr4 105000 120000 0.01292745430078102 + chr4 60000 75000 chr4 435000 450000 0.01292745430078102 + chr4 75000 90000 chr4 75000 90000 0.05170981720312409 + chr4 75000 90000 chr4 345000 360000 0.01292745430078102 + chr4 90000 105000 chr4 90000 105000 0.01292745430078102 + . + . + . + . + . + . + + + + + + + + + + + + + + + + + Save as ccmap file + + + Save as ccmap file + + + Save as CCMAP Files: + + + true + + + + + + Output File Suffix: + + + + + + + + 0 + 0 + + + + + 100 + 0 + + + + + 100 + 16777215 + + + + Suffix of ccmap files + + + <html><head/><body><p>Use this to convert all contact maps to ccmap format files. Provide suffix of ccmap file names with this option and it will enable the conversion.</p><p>Ouput ccmap file name is generated outmatically as follows; if xlabel is not equal to ylabel: &quot;&lt;xlabel&gt;_&lt;ylabel&gt;_&lt;suffix&gt;.ccmap&quot; else: &quot;&lt;xlabel&gt;_&lt;suffix&gt;.ccmap&quot;</p><p>Note that &quot;Ouput Directory&quot; is also required because all ccmaps will be saved in this directory.</p></body></html> + + + + + + + Qt::Vertical + + + + + + + Output Directory: + + + + + + + Directory to save all ccmap files. + + + Directory to save all ccmap files. This options is neccessary. + + + + + + + + + + + + + + + + + Save as GCMAP File: + + + true + + + + + + Output File: + + + + + + + To save all maps in a gcmap file + + + Name of output gcmap file. All maps will be saved in this file. + + + + + + + + + + + + + + Qt::Vertical + + + + + + + Compression: + + + + + + + + 0 + 0 + + + + + 60 + 16777215 + + + + Data compression method in gcmap file.Data compression method in gcmap file. + + + <html><head/><body><p>Data compression method in gcmap file.</p><p>LZF : This method is very fast, and allow the rapid contact map reading. However, the size reduction is moderate in comparison with GZIP compression method. </p><p>GZIP: Slow but high compression ratio. use this method, if file has to read from other external software.</p></body></html> + + + + LZF + + + + + GZIP + + + + + + + + Qt::Vertical + + + + + + + Downsample by: + + + + + + + Downsampling method to coarsen the resolution in gcmap file. + + + <html><head/><body><p>Downsampling method to coarsen the resolution in gcmap file. Three implemented methods are</p><p>1) Sum : sum of values,</p><p>2) Average : Average of values and</p><p>3) Maximum : Maximum of values.</p><p>This option generates all coarser maps where resolutions will be coarsened by a factor of two, consequetively. e.g.: In case of 10 kb input resolution, downsampled maps of &quot;20kb&quot;, &quot;40kb&quot;, &quot;80kb&quot;, &quot;160kb&quot;, &quot;320kb&quot; etc. will be generated until, map size is less than 500.</p></body></html> + + + + Sum + + + + + Maximum + + + + + Average + + + + + + + + + + + + + Scratch Directory: + + + + + + + Directory where temporary files will be stored. + + + Directory where temporary files will be stored. + + + + + + + + + + + + + + + + Qt::Vertical + + + + 20 + 40 + + + + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + + 75 + true + + + + Stop + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + + 75 + true + + + + Run + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + diff --git a/gcMapExplorer/gui/UIs/mainWindow.ui b/gcMapExplorer/gui/UIs/mainWindow.ui index 8526b9e..d13aa2b 100644 --- a/gcMapExplorer/gui/UIs/mainWindow.ui +++ b/gcMapExplorer/gui/UIs/mainWindow.ui @@ -138,8 +138,11 @@ QFrame::Raised + + Qt::ScrollBarAsNeeded + - true + false diff --git a/gcMapExplorer/gui/UIs/normalizer.ui b/gcMapExplorer/gui/UIs/normalizer.ui index c01bbba..76acf53 100644 --- a/gcMapExplorer/gui/UIs/normalizer.ui +++ b/gcMapExplorer/gui/UIs/normalizer.ui @@ -84,7 +84,7 @@ 0 0 604 - 498 + 504 @@ -137,8 +137,15 @@ + + + 75 + true + true + + - + ?? @@ -678,22 +685,90 @@ Options - - + + 4 + + 2 + - 4 + 2 - 4 + 2 - 4 + 2 + + + + + + Minimum Thershold Value: + + + + + + + + 100 + 16777215 + + + + Minimum thershold value for normalization. + + + Minimum thershold value for normalization. If contact frequency is less than or equal to this thershold value, this value is discarded during normalization. + + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + Maximum Thershold Value: + + + + + + + + 100 + 16777215 + + + + Maximum thershold value for normalization. + + + Maximum thershold value for normalization. If contact frequency is greater than or equal to this thershold value, +this value is discarded during normalization. + + + + + - + @@ -703,7 +778,7 @@ - 100 + 50 16777215 @@ -715,7 +790,7 @@ - + @@ -725,7 +800,7 @@ - 100 + 50 16777215 @@ -737,35 +812,35 @@ - + Percentile Thershold for Missing Data: - + Fraction Thershold for Data Occupancy: - + Scratch Directory - + Directory where temporary files will be stored. - + @@ -806,16 +881,16 @@ 4 - 4 + 2 - 4 + 2 - 4 + 2 - 4 + 2 diff --git a/gcMapExplorer/gui/UIs/other1DFormatFileConversion.ui b/gcMapExplorer/gui/UIs/other1DFormatFileConversion.ui index eb5f783..bc763e3 100644 --- a/gcMapExplorer/gui/UIs/other1DFormatFileConversion.ui +++ b/gcMapExplorer/gui/UIs/other1DFormatFileConversion.ui @@ -634,6 +634,13 @@ + + + 75 + true + true + + What is this?? diff --git a/gcMapExplorer/gui/UIs/userColorMapDialog.ui b/gcMapExplorer/gui/UIs/userColorMapDialog.ui index 5777707..8f121f4 100644 --- a/gcMapExplorer/gui/UIs/userColorMapDialog.ui +++ b/gcMapExplorer/gui/UIs/userColorMapDialog.ui @@ -63,6 +63,13 @@ + + + 75 + true + true + + What is this?? diff --git a/gcMapExplorer/gui/browser.py b/gcMapExplorer/gui/browser.py index a7638da..657243a 100644 --- a/gcMapExplorer/gui/browser.py +++ b/gcMapExplorer/gui/browser.py @@ -147,8 +147,8 @@ def loadDataToPlot(self, gpa): # if a Dataset is selected by user, then process further if gpa.dataArray is not None: # Generate treeWidget item and add it as a child to ccmap axis treeWidget - gpa.set_tree_widget_item() - self.hiCmapAxes[idx].treeWidgetItem.addChild(gpa.treeWidgetItem) + gpa.set_tree_widget_item(self.axisTreeWidget) + # Append GenomicDataPlotAxis to list self.hiCmapAxes[idx].genmoicPlotAxes.append(gpa) @@ -1092,7 +1092,7 @@ def set_horizontal_space_to_figure(self, operator): self.canvas.draw() self.get_horizontal_vertical_space_from_figure() - def add_new_ccmap_axes(self): + def add_new_ccmap_axes(self, pathToMapFile): ''' Add new ccmap axes This function add new hic-map on the canvas. To add new plots, previous plots are at first removed. @@ -1112,10 +1112,10 @@ def add_new_ccmap_axes(self): # Initialize and append new ccmap axis object into hic-map list self.hiCmapAxes.append(CCMAPAXIS(0, ax)) - self.hiCmapAxes[0].title = 'Map 1' + self.hiCmapAxes[0].title = os.path.splitext(pathToMapFile)[0] # Add this ccmap axis to the tree widget - self.hiCmapAxes[0].set_tree_widget_item() + self.hiCmapAxes[0].set_tree_widget_item(self.axisTreeWidget) self.axisTreeWidget.addTopLevelItem(self.hiCmapAxes[0].treeWidgetItem) self.axisTreeWidget.setCurrentItem(self.hiCmapAxes[0].treeWidgetItem) @@ -1176,13 +1176,13 @@ def add_new_ccmap_axes(self): self.hiCmapAxes.append(CCMAPAXIS(i, ax)) # generate title - self.hiCmapAxes[i].title = 'Map {0}' .format(i+1) + self.hiCmapAxes[i].title = os.path.splitext(pathToMapFile)[0] # set index of active ccmap self.ActiveHiCmapAxis = i # Add new map to tree widget - self.hiCmapAxes[i].set_tree_widget_item() + self.hiCmapAxes[i].set_tree_widget_item(self.axisTreeWidget) self.axisTreeWidget.addTopLevelItem(self.hiCmapAxes[i].treeWidgetItem) self.axisTreeWidget.setCurrentItem(self.hiCmapAxes[i].treeWidgetItem) @@ -1389,7 +1389,7 @@ def open_map_pyobj(self): return # Add new CCMAPAXIS instance and respective matplotlib axes instance - self.add_new_ccmap_axes() + self.add_new_ccmap_axes(path[0]) # If first time, enable all options if self.hiCmapAxes is not None: @@ -3022,14 +3022,22 @@ def changeDataByName(self, browser, name, change=False): return success - def set_tree_widget_item(self): + def set_tree_widget_item(self, treeWidget): """Initialize tree widget item """ + title = None if self.hdf5Hand is not None: - self.treeWidgetItem = QTreeWidgetItem([self.hdf5Hand.title], 0) + title = self.hdf5Hand.title if self.txtFileHand is not None: - self.treeWidgetItem = QTreeWidgetItem([self.txtFileHand.title], 0) + title = self.txtFileHand.title + + if title is not None: + label = QLabel(title) + label.setWordWrap(True) + self.treeWidgetItem = QTreeWidgetItem() + self.hiCmapAxis.treeWidgetItem.addChild(self.treeWidgetItem) + treeWidget.setItemWidget(self.treeWidgetItem, 0, label) def updatePlot(self, ax=None): """Update plots @@ -3465,8 +3473,13 @@ def set_ccmap(self, path, fileType, mapName=None, resolution=None, filesOpened=N # Make list of all contact maps self.tryEnableInterchangeCMapName() - def set_tree_widget_item(self): - self.treeWidgetItem = QTreeWidgetItem([self.title], 0) + def set_tree_widget_item(self, treeWidget): + label = QLabel(self.title) + label.setWordWrap(True) + self.treeWidgetItem = QTreeWidgetItem() + treeWidget.addTopLevelItem(self.treeWidgetItem) + treeWidget.setItemWidget(self.treeWidgetItem, 0, label) + #self.treeWidgetItem = QTreeWidgetItem([self.title], 0) def update_axes_props(self): diff --git a/gcMapExplorer/gui/importer_ui.py b/gcMapExplorer/gui/importer_ui.py index ce1bc67..2bde980 100644 --- a/gcMapExplorer/gui/importer_ui.py +++ b/gcMapExplorer/gui/importer_ui.py @@ -700,10 +700,152 @@ def binContactConstructCommand(self, opts): self.binContactCommand = command +class pairCooMatFormatTabWidgetHelper: + """ Helper class containing all member-functions related to paired-COO matrix file tab-widget + """ + + def initPairCooMatFormatTabWidget(self): + self.pairCooMatCommand = None + + self.pairCooMatInputFileBrowsButton.setIcon( self.style().standardIcon(QStyle.SP_DirOpenIcon) ) + self.pairCooMatOutDirBrowseButton.setIcon( self.style().standardIcon(QStyle.SP_DirOpenIcon) ) + self.pairCooMatScratchDirBrowsButton.setIcon( self.style().standardIcon(QStyle.SP_DirOpenIcon) ) + self.pairCooMatGCMapOutSelectButton.setIcon( self.style().standardIcon(QStyle.SP_DirOpenIcon) ) + + self.pairCooMatTabRunButton.setIcon( self.style().standardIcon(QStyle.SP_MediaPlay) ) + self.pairCooMatTabStopButton.setIcon( self.style().standardIcon(QStyle.SP_MediaStop) ) + + self.connectPairCooMatTabWidgets() + + def connectPairCooMatTabWidgets(self): + self.pairCooMatInputFileBrowsButton.clicked.connect( self.pairCooMatBrowseInputFile ) + self.pairCooMatScratchDirBrowsButton.clicked.connect( self.pairCooMatBrowseScratchDir ) + self.pairCooMatOutDirBrowseButton.clicked.connect( self.pairCooMatBrowseOutputDir ) + self.pairCooMatGCMapOutSelectButton.clicked.connect( self.pairCooMatOpenGCMapFile ) + + self.pairCooMatTabRunButton.clicked.connect( self.runPairCooMatCommand ) + self.pairCooMatTabStopButton.clicked.connect( self.terminateProcessing ) + + self.pairCooMatInputFileLineEdit.editingFinished.connect( lambda: checkFileExist(self.pairCooMatInputFileLineEdit, self) ) + self.pairCooMatScratchDirLineEdit.editingFinished.connect( lambda: checkDirExist(self.pairCooMatScratchDirLineEdit, self) ) + self.pairCooMatOutDIrLineEdit.editingFinished.connect( lambda: checkDirExist(self.pairCooMatOutDIrLineEdit, self) ) + + + def pairCooMatBrowseInputFile(self): + """To get compressed file with full path + """ + # A dialog box will be displayed to select a text file and path will be stored in the cell + file_choices = " Text file (*.txt *.dat);; All file (*.*)" + path = QFileDialog.getOpenFileName(self, 'Open File', '', file_choices) + if path[0]: + self.pairCooMatInputFileLineEdit.setText(path[0]) + + def pairCooMatBrowseOutputDir(self): + """Browse and choose output directory + """ + path = QFileDialog.getExistingDirectory(self, 'Select Output Directory') + if path: + self.pairCooMatOutDIrLineEdit.setText(path) + + def pairCooMatBrowseScratchDir(self): + """Browse and choose scratch directory + """ + path = QFileDialog.getExistingDirectory(self, 'Select Scratch Directory') + if path: + self.pairCooMatScratchDirLineEdit.setText(path) + + def pairCooMatOpenGCMapFile(self): + """To open gcmap file with full path + """ + # A dialog box will be displayed to select a text file and path will be stored in the cell + file_choices = " gcmap file (*.gcmap);;All files(*.*)" + path = QFileDialog.getSaveFileName(self, 'Select or Create File', '', file_choices, options=QFileDialog.DontConfirmOverwrite) + if path[0]: + self.pairCooMatGCMapOutLineEdit.setText(path[0]) + + def readAndConstructPairCooMatCommand(self): + """Read and construct the command line + """ + self.pairCooMatCommand = None + options = dict() + + inputFile = None + if self.pairCooMatInputFileLineEdit.text(): + inputFile = str( self.pairCooMatInputFileLineEdit.text() ) + else: + self.pairCooMatInputFileLineEdit.setFocus() + showWarningMessageBox("No input file given !!!", self) + return False + options['-i'] = inputFile + + options['-wd'] = '"{0}"'.format(self.pairCooMatScratchDirLineEdit.text()) + + if not self.pairCooMatCCMapGroupBox.isChecked() \ + and not self.pairCooMatGCMapGroupBox.isChecked(): + showWarningMessageBox("No ccmap or gcmap output !!!", self) + return False + + ccmapSuffix = None + if self.pairCooMatCCMapGroupBox.isChecked(): + ccmapSuffix = str( self.pairCooMatOutSuffixLineEdit.text() ) + if not ccmapSuffix: + msg = "No suffix provided for ccmap files. \n" \ + + "Please provide a suffix for proper name." + showWarningMessageBox(msg, self) + self.pairCooMatOutSuffixLineEdit.setFocus() + return False + + outDir = str( self.pairCooMatOutDIrLineEdit.text() ) + if not outDir: + msg = "No Output Directory provided for ccmap files \n" \ + + "Please select a directory to save ccmap files." + showWarningMessageBox(msg, self) + self.pairCooMatOutDIrLineEdit.setFocus() + return False + + options['-ccm'] = ccmapSuffix + options['-od'] = '"{0}"'.format(outDir) + + if self.pairCooMatGCMapGroupBox.isChecked(): + fileGCMap = str( self.pairCooMatGCMapOutLineEdit.text() ) + if not fileGCMap: + msg = "No Output gcmap file is created or selected \n" \ + + "Please select or create a gcmap file." + showWarningMessageBox(msg, self) + self.pairCooMatGCMapOutLineEdit.setFocus() + return False + + options['-gcm'] = '"{0}"'.format(fileGCMap) + options['-cmeth'] = str( self.pairCooMatGCMapCompressCBox.currentText() ).lower() + options['-dmeth'] = str( self.pairCooMatGCMapDownsampleCBox.currentText() ).lower() + + self.pairCooMatConstructCommand(options) + + def pairCooMatConstructCommand(self, opts): + """Construct the command line + """ + command = ' pairCoo2cmap ' + command += ' -i ' + opts['-i'] + + if '-ccm' in opts: + command += ' -ccm ' + opts['-ccm'] + command += ' -od ' + opts['-od'] + + if '-gcm' in opts: + command += ' -gcm ' + opts['-gcm'] + command += ' -cmeth ' + opts['-cmeth'] + command += ' -dmeth ' + opts['-dmeth'] + + command += ' -wd ' + opts['-wd'] + + self.pairCooMatCommand = command + # Main Window Of Importer pathToThisUI = os.path.join(PathToUIs, 'importer.ui') Ui_ImporterWindow, ImporterWindowBase = loadUiType(pathToThisUI) -class ImporterWindow(ImporterWindowBase, Ui_ImporterWindow, cooMatFormatTabWidgetbHelper, homerFormatTabWidgetHelper, binContactFormatTabWidgetHelper): +class ImporterWindow(ImporterWindowBase, Ui_ImporterWindow, cooMatFormatTabWidgetbHelper, + homerFormatTabWidgetHelper, binContactFormatTabWidgetHelper, + pairCooMatFormatTabWidgetHelper): def __init__(self): super(ImporterWindow, self).__init__() self.setupUi(self) @@ -723,6 +865,7 @@ def __init__(self): self.initCooMatFormatTabWidget() self.initHomerFormatTabWidget() self.initBinContactFormatTabWidget() + self.initPairCooMatFormatTabWidget() self.setDefaultScratchDirs() self.connectMainButtons() @@ -751,7 +894,6 @@ def closeEvent(self, event): def connectMainButtons(self): self.inputSelectorQCBox.currentIndexChanged.connect( self.InputsTabWidget.setCurrentIndex ) - self.whatsThisButton.setIcon( self.style().standardIcon(QStyle.SP_TitleBarContextHelpButton) ) self.whatsThisButton.clicked.connect( QWhatsThis.enterWhatsThisMode ) self.logOutputClearButton.clicked.connect( self.logOutputPlainTextEdit.clear ) @@ -760,6 +902,7 @@ def setDefaultScratchDirs(self): self.cooMatScratchDirLineEdit.setText(defaultDir) self.homerScratchDirLineEdit.setText(defaultDir) self.binContactScratchDirLineEdit.setText(defaultDir) + self.pairCooMatScratchDirLineEdit.setText(defaultDir) def runCooMatrixCommand(self): self.readAndConstructCooMatCommand() @@ -776,6 +919,11 @@ def runBinContactCommand(self): if self.binContactCommand is None: return self.startProcess(self.binContactCommand, self.binContactTabRunButton) + def runPairCooMatCommand(self): + self.readAndConstructPairCooMatCommand() + if self.pairCooMatCommand is None: return + self.startProcess(self.pairCooMatCommand, self.pairCooMatTabRunButton) + def startProcess(self, command, button): self.process = QProcess(self) self.process.start('gcMapExplorer', shlex.split(command)) diff --git a/gcMapExplorer/gui/normalizer_ui.py b/gcMapExplorer/gui/normalizer_ui.py index 9589532..45f6aaf 100644 --- a/gcMapExplorer/gui/normalizer_ui.py +++ b/gcMapExplorer/gui/normalizer_ui.py @@ -64,7 +64,7 @@ def __init__(self): # Resize hight and reduce size of log text box self.resize(self.width(), 680) - self.splitter.setSizes([500, 180]) + self.splitter.setSizes([520, 160]) # Remove maximize window buttons self.setWindowFlags( (self.windowFlags() | Qt.CustomizeWindowHint) & ~Qt.WindowMaximizeButtonHint) @@ -105,7 +105,6 @@ def setDefaultScratchDirs(self): def connectButtons(self): self.methodCBox.currentIndexChanged.connect( self.specOptsTabWidget.setCurrentIndex ) - self.whatsThisButton.setIcon( self.style().standardIcon(QStyle.SP_TitleBarContextHelpButton) ) self.whatsThisButton.clicked.connect( QWhatsThis.enterWhatsThisMode ) self.inputFileButton.setIcon( self.style().standardIcon(QStyle.SP_DirOpenIcon) ) @@ -141,6 +140,10 @@ def connectLineEdits(self): self.specOptsIcTolLineEdit.setValidator(QDoubleValidator()) self.specOptsIcIterLineEdit.setValidator(QIntValidator()) + # vmin and vmax + self.vminLineEdit.setValidator(QDoubleValidator()) + self.vmaxLineEdit.setValidator(QDoubleValidator()) + # Check for input file - file exist; determine file format self.inputFileLineEdit.editingFinished.connect( lambda: guiHelpers.checkFileExist(self.inputFileLineEdit, self) ) self.inputFileLineEdit.editingFinished.connect( lambda: self.setFileFormat(self.inputFileLineEdit, self.inputFormatCBox) ) @@ -351,6 +354,12 @@ def readAndConstructCommand(self): cmdDict['-fi'] = inputFormat cmdDict['-fo'] = outputFormat + # Vmin and vmax + if self.vminLineEdit.text(): + cmdDict['-vmin'] = self.vminLineEdit.text() + if self.vmaxLineEdit.text(): + cmdDict['-vmax'] = self.vmaxLineEdit.text() + # Working or scratch directory workDir = self.genOptsScratchDirLineEdit.text() if not workDir: @@ -439,6 +448,11 @@ def constructKrCommand(self, cmdDict): command += ' -fo ' + cmdDict['-fo'] command += ' -t ' + str(cmdDict['-t']) command += ' -m ' + cmdDict['-m'] + + if '-vmin' in cmdDict: + command += ' -vmin ' + str(cmdDict['-vmin']) + if '-vmax' in cmdDict: + command += ' -vmax ' + str(cmdDict['-vmax']) if '-mscm' in cmdDict: command += ' -mscm ' + str(cmdDict['-mscm']) if '-cmeth' in cmdDict: @@ -461,6 +475,10 @@ def constructIcCommand(self, cmdDict): command += ' -fo ' + cmdDict['-fo'] command += ' -t ' + str(cmdDict['-t']) command += ' -c ' + str(cmdDict['-c']) + if '-vmin' in cmdDict: + command += ' -vmin ' + str(cmdDict['-vmin']) + if '-vmax' in cmdDict: + command += ' -vmax ' + str(cmdDict['-vmax']) if '-cmeth' in cmdDict: command += ' -cmeth ' + cmdDict['-cmeth'] command += ' -wd ' + cmdDict['-wd'] @@ -480,6 +498,10 @@ def constructMcfsCommand(self, cmdDict): command += ' -o ' + cmdDict['-o'] command += ' -fo ' + cmdDict['-fo'] command += ' -s ' + str(cmdDict['-s']) + if '-vmin' in cmdDict: + command += ' -vmin ' + str(cmdDict['-vmin']) + if '-vmax' in cmdDict: + command += ' -vmax ' + str(cmdDict['-vmax']) if '-cmeth' in cmdDict: command += ' -cmeth ' + cmdDict['-cmeth'] command += ' -wd ' + cmdDict['-wd'] diff --git a/gcMapExplorer/lib/gcmap.py b/gcMapExplorer/lib/gcmap.py index fdd90af..b9ebfc7 100644 --- a/gcMapExplorer/lib/gcmap.py +++ b/gcMapExplorer/lib/gcmap.py @@ -818,17 +818,19 @@ def addCCMap2GCMap(cmap, filename, compression='lzf', generateCoarse=True, coars else: logger.info(' Adding data to [{0}] for [{1} - {2}] ...'.format(filename, groupName, resolution)) - if compression == 'lzf': - newCmap = group.create_dataset(resolution, cmap.shape, dtype=cmap.dtype, data=cmap.matrix, chunks=True, compression="lzf", shuffle=True) - else: - newCmap = group.create_dataset(resolution, cmap.shape, dtype=cmap.dtype, data=cmap.matrix, chunks=True, compression="gzip", shuffle=True, compression_opts=4) + # Remove old map + if resolution in group: + group.pop(resolution) + + + # Add new map + newCmap = group.create_dataset(resolution, cmap.shape, dtype=cmap.dtype, data=cmap.matrix, chunks=True, compression=compression, shuffle=True) # Save all other attributes if cmap.bNoData is not None: - if compression == 'lzf': - group.create_dataset(resolution+'-bNoData', cmap.bNoData.shape, dtype=cmap.bNoData.dtype, data=cmap.bNoData, chunks=True, compression="lzf", shuffle=True) - else: - group.create_dataset(resolution+'-bNoData', cmap.bNoData.shape, dtype=cmap.bNoData.dtype, data=cmap.bNoData, chunks=True, compression="gzip", shuffle=True, compression_opts=4) + if resolution+'-bNoData' in group: + group.pop(resolution+'-bNoData') + group.create_dataset(resolution+'-bNoData', cmap.bNoData.shape, dtype=cmap.bNoData.dtype, data=cmap.bNoData, chunks=True, compression=compression, shuffle=True) # Get minimum value othar than zero if cmap.minvalue == 0: diff --git a/gcMapExplorer/lib/genomicsDataHandler.py b/gcMapExplorer/lib/genomicsDataHandler.py index 542ed31..5d47d9e 100644 --- a/gcMapExplorer/lib/genomicsDataHandler.py +++ b/gcMapExplorer/lib/genomicsDataHandler.py @@ -446,8 +446,8 @@ def open(self): # Fail-safe mechanism for title in case of either new file or already opened file if 'title' not in self.hdf5.attrs: if self.title is None: - self.hdf5.attrs['title'] = "Genome Data" - self.title = "Genome Data" + self.hdf5.attrs['title'] = os.path.splitext(os.path.basename(self.filename))[0] + self.title = os.path.splitext(os.path.basename(self.filename))[0] else: self.hdf5.attrs['title'] = self.title else: @@ -456,6 +456,25 @@ def open(self): self.logger.info(' Opened {0} ...' .format(self.filename)) + def setTitle(self, title): + """ Set title of the dataset + + It can be used to set or replace the title of the dataset. + If file is not yet opened, title will be stored to file when file + will be opened. + + Parameters + ---------- + title : str + The title of dataset. + + """ + self.title = title + if self.hdf5 is not None: + self.hdf5.attrs['title'] = os.path.splitext(os.path.basename(self.filename))[0] + else: + self.logger.info(" File is not opened!!! Title will be stored to file on file opening.") + def getChromList(self): """To get list of all chromosomes present in hdf5 file diff --git a/gcMapExplorer/lib/normalizer.py b/gcMapExplorer/lib/normalizer.py index b37f30c..9707def 100644 --- a/gcMapExplorer/lib/normalizer.py +++ b/gcMapExplorer/lib/normalizer.py @@ -253,7 +253,7 @@ def NormalizeKnightRuizOriginal(ccMapObj, tol=1e-12, x0=None, delta=0.1, Delta=3 return normCCMap -def normalizeCCMapByKR(ccMap, memory='RAM', tol=1e-12, outFile=None, percentile_thershold_no_data=None, thershold_data_occup=None, workDir=None): +def normalizeCCMapByKR(ccMap, memory='RAM', tol=1e-12, outFile=None, vmin=None, vmax=None, percentile_thershold_no_data=None, thershold_data_occup=None, workDir=None): """Normalize a ccmap using Knight-Ruiz matrix balancing method. .. note:: @@ -277,6 +277,12 @@ def normalizeCCMapByKR(ccMap, memory='RAM', tol=1e-12, outFile=None, percentile_ outFile : str Name of output ccmap file, to save directly the normalized map as a ccmap file. In case of this option, ``None`` will return. + vmin : float + Minimum thershold value for normalization. If contact frequency is less than or equal to this thershold value, this value is discarded during normalization. + + vmax : float + Maximum thershold value for normalization. If contact frequency is greater than or equal to this thershold value, this value is discarded during normalization. + percentile_thershold_no_data : int It can be used to filter the map, where rows/columns with largest numbers of missing data can be discarded. ``percentile_thershold_no_data`` should be between 1 and 100. This options discard the rows and columns which are above this percentile. @@ -306,7 +312,26 @@ def normalizeCCMapByKR(ccMap, memory='RAM', tol=1e-12, outFile=None, percentile_ """ # Check whether input is a file or a obejct - ccMapObj, ccmapType = _checkCCMAP(ccMap, workDir=workDir) + ccMapObjOrig, ccmapType = _checkCCMAP(ccMap, workDir=workDir) + + # Make another copy here for maximum and minimum thershold value + if vmin is not None or vmax is not None: + ccMapObj = ccMapObjOrig.copy() + ccMapObj.make_editable() + + if ccmapType == 'File': + del ccMapObjOrig + + ccmapType = 'File' # This temporary file should be deleted when neccessary + if vmin is not None: + ccMapObj.matrix[ np.nonzero(ccMapObj.matrix <= vmin) ] = 0.0 + if vmax is not None: + ccMapObj.matrix[ np.nonzero(ccMapObj.matrix >= vmax) ] = 0.0 + ccMapObj.matrix.flush() + ccMapObj.make_unreadable() + + else: + ccMapObj = ccMapObjOrig normCCMap = ccMapObj.copy(fill=0.0) normCCMap.make_editable() @@ -397,7 +422,7 @@ def normalizeCCMapByKR(ccMap, memory='RAM', tol=1e-12, outFile=None, percentile_ logger.warning('Error in normalizing map!!!') return None -def normalizeGCMapByKR(gcMapInputFile, gcMapOutFile, mapSizeCeilingForMemory=20000, tol=1e-12, percentile_thershold_no_data=None, thershold_data_occup=None, compression='lzf', workDir=None, logHandler=None): +def normalizeGCMapByKR(gcMapInputFile, gcMapOutFile, mapSizeCeilingForMemory=20000, vmin=None, vmax=None, tol=1e-12, percentile_thershold_no_data=None, thershold_data_occup=None, compression='lzf', workDir=None, logHandler=None): """Normalize a gcmap using Knight-Ruiz matrix balancing method. @@ -413,6 +438,12 @@ def normalizeGCMapByKR(gcMapInputFile, gcMapOutFile, mapSizeCeilingForMemory=200 Maximum size of contact map allowed for calculation using RAM. If map size or shape is larger than this value, normalization will be performed using disk (HDD). + vmin : float + Minimum thershold value for normalization. If contact frequency is less than or equal to this thershold value, this value is discarded during normalization. + + vmax : float + Maximum thershold value for normalization. If contact frequency is greater than or equal to this thershold value, this value is discarded during normalization. + tol : float Tolerance for matrix balancing. Smaller tolreance increases accuracy in sums of rows and columns. @@ -459,6 +490,18 @@ def normalizeGCMapByKR(gcMapInputFile, gcMapOutFile, mapSizeCeilingForMemory=200 for mapName in mapList: ccMap = gmp.loadGCMapAsCCMap(gcMapInputFile, mapName=mapName, workDir=workDir) + # Because ccMap is already loaded here, directly edit matrix here + # No need to pass vmin and vmx during normalization as + # it is already taken care here + if vmin is not None or vmax is not None: + ccMap.make_editable() + if vmin is not None: + ccMap.matrix[ np.nonzero(ccMap.matrix <= vmin) ] = 0.0 + if vmax is not None: + ccMap.matrix[ np.nonzero(ccMap.matrix >= vmax) ] = 0.0 + ccMap.matrix.flush() + ccMap.make_unreadable() + try: if ccMap.shape[0] > mapSizeCeilingForMemory: @@ -482,7 +525,7 @@ def normalizeGCMapByKR(gcMapInputFile, gcMapOutFile, mapSizeCeilingForMemory=200 if 'ccMap' in locals(): del ccMap raise e -def normalizeCCMapByIC(ccMap, tol=1e-4, outFile=None, iteration=500, percentile_thershold_no_data=None, thershold_data_occup=None, workDir=None): +def normalizeCCMapByIC(ccMap, tol=1e-4, vmin=None, vmax=None, outFile=None, iteration=500, percentile_thershold_no_data=None, thershold_data_occup=None, workDir=None): """ Normalize a ccmap by Iterative correction method This method normalize the raw contact map by removing biases from experimental procedure. @@ -496,12 +539,18 @@ def normalizeCCMapByIC(ccMap, tol=1e-4, outFile=None, iteration=500, percentile_ tol : float Tolerance value. If variance of Delta-B is less than tolerance, stop the iterative process. - iteration : int - Number of iteration to stop the normalization. + vmin : float + Minimum thershold value for normalization. If contact frequency is less than or equal to this thershold value, this value is discarded during normalization. + + vmax : float + Maximum thershold value for normalization. If contact frequency is greater than or equal to this thershold value, this value is discarded during normalization. outFile : str Name of output ccmap file, to save directly the normalized map as a ccmap file. In case of this option, ``None`` will return. + iteration : int + Number of iteration to stop the normalization. + percentile_thershold_no_data : int It can be used to filter the map, where rows/columns with largest numbers of missing data can be discarded. ``percentile_thershold_no_data`` should be between 1 and 100. This options discard the rows and columns which are above this percentile. @@ -534,6 +583,15 @@ def normalizeCCMapByIC(ccMap, tol=1e-4, outFile=None, iteration=500, percentile_ tmap = ccMapObj.copy() + # In case if vmin and vmax is given + if vmin is not None or vmax is not None: + tmap.make_editable() + if vmin is not None: + tmap.matrix[ np.nonzero(tmap.matrix <= vmin) ] = 0.0 + if vmax is not None: + tmap.matrix[ np.nonzero(tmap.matrix >= vmax) ] = 0.0 + tmap.make_unreadable() + if ccMapObj.xlabel is not None: logger.info(' Iterative Correction is in process for {0} map...'.format(ccMapObj.xlabel)) else: @@ -611,7 +669,7 @@ def normalizeCCMapByIC(ccMap, tol=1e-4, outFile=None, iteration=500, percentile_ logger.warning('Error in Iterative Correction!!!') return None -def normalizeGCMapByIC(gcMapInputFile, gcMapOutFile, tol=1e-12, iteration=500, percentile_thershold_no_data=None, thershold_data_occup=None, compression='lzf', workDir=None, logHandler=None): +def normalizeGCMapByIC(gcMapInputFile, gcMapOutFile, vmin=None, vmax=None, tol=1e-12, iteration=500, percentile_thershold_no_data=None, thershold_data_occup=None, compression='lzf', workDir=None, logHandler=None): """Normalize a gcmap using Iterative Correction. This method normalize the raw contact map by removing biases from experimental procedure. @@ -625,6 +683,12 @@ def normalizeGCMapByIC(gcMapInputFile, gcMapOutFile, tol=1e-12, iteration=500, p gcMapOutFile : str Name of output gcmap file. + vmin : float + Minimum thershold value for normalization. If contact frequency is less than or equal to this thershold value, this value is discarded during normalization. + + vmax : float + Maximum thershold value for normalization. If contact frequency is greater than or equal to this thershold value, this value is discarded during normalization. + tol : float Tolerance value. If variance of Delta-B is less than tolerance, stop the iterative process. @@ -677,6 +741,7 @@ def normalizeGCMapByIC(gcMapInputFile, gcMapOutFile, tol=1e-12, iteration=500, p try: norm_ccmap = normalizeCCMapByIC(ccMap, tol=tol, iteration=iteration, + vmin=vmin, vmax=vmax, percentile_thershold_no_data=percentile_thershold_no_data, thershold_data_occup=thershold_data_occup, workDir=workDir) @@ -692,7 +757,7 @@ def normalizeGCMapByIC(gcMapInputFile, gcMapOutFile, tol=1e-12, iteration=500, p if 'ccMap' in locals(): del ccMap raise e -def normalizeCCMapByMCFS(ccMap, stats='median', outFile=None, percentile_thershold_no_data=None, thershold_data_occup=None, workDir=None): +def normalizeCCMapByMCFS(ccMap, stats='median', vmin=None, vmax=None, outFile=None, percentile_thershold_no_data=None, thershold_data_occup=None, workDir=None): """ Scale ccmap using Median Contact Frequency This method can be used to normalize contact map using Median contact values @@ -712,6 +777,12 @@ def normalizeCCMapByMCFS(ccMap, stats='median', outFile=None, percentile_thersho stats : str Statistics to be calculated along diagonals: It may be either "mean" or "median". By default, it is "median". + vmin : float + Minimum thershold value for normalization. If contact frequency is less than or equal to this thershold value, this value is discarded during normalization. + + vmax : float + Maximum thershold value for normalization. If contact frequency is greater than or equal to this thershold value, this value is discarded during normalization. + outFile : str Name of output ccmap file, to save directly the normalized map as a ccmap file. In case of this option, ``None`` will return. @@ -743,7 +814,26 @@ def normalizeCCMapByMCFS(ccMap, stats='median', outFile=None, percentile_thersho """ # Check whether input is a file or a obejct - ccMapObj, ccmapType = _checkCCMAP(ccMap, workDir=workDir) + ccMapObjOrig, ccmapType = _checkCCMAP(ccMap, workDir=workDir) + + # Make another copy here for maximum and minimum thershold value + if vmin is not None or vmax is not None: + ccMapObj = ccMapObjOrig.copy() + ccMapObj.make_editable() + + if ccmapType == 'File': + del ccMapObjOrig + + ccmapType = 'File' # This temporary file should be deleted when neccessary + if vmin is not None: + ccMapObj.matrix[ np.nonzero(ccMapObj.matrix <= vmin) ] = 0.0 + if vmax is not None: + ccMapObj.matrix[ np.nonzero(ccMapObj.matrix >= vmax) ] = 0.0 + ccMapObj.matrix.flush() + ccMapObj.make_unreadable() + + else: + ccMapObj = ccMapObjOrig normCCMap = ccMapObj.copy() @@ -792,7 +882,7 @@ def normalizeCCMapByMCFS(ccMap, stats='median', outFile=None, percentile_thersho logger.warning('Error in Median Contact Frequency Scaling!!!') return None -def normalizeGCMapByMCFS(gcMapInputFile, gcMapOutFile, stats='median', percentile_thershold_no_data=None, thershold_data_occup=None, compression='lzf', workDir=None, logHandler=None): +def normalizeGCMapByMCFS(gcMapInputFile, gcMapOutFile, stats='median', vmin=None, vmax=None, percentile_thershold_no_data=None, thershold_data_occup=None, compression='lzf', workDir=None, logHandler=None): """ Scale all maps in gcmap using Median Contact Frequency This method can be used to normalize contact map using Median contact values @@ -812,6 +902,12 @@ def normalizeGCMapByMCFS(gcMapInputFile, gcMapOutFile, stats='median', percentil stats : str Statistics to be calculated along diagonals: It may be either "mean" or "median". By default, it is "median". + vmin : float + Minimum thershold value for normalization. If contact frequency is less than or equal to this thershold value, this value is discarded during normalization. + + vmax : float + Maximum thershold value for normalization. If contact frequency is greater than or equal to this thershold value, this value is discarded during normalization. + percentile_thershold_no_data : int It can be used to filter the map, where rows/columns with largest numbers of missing data can be discarded. ``percentile_thershold_no_data`` should be between 1 and 100. This options discard the rows and columns which are above this percentile. @@ -861,6 +957,18 @@ def normalizeGCMapByMCFS(gcMapInputFile, gcMapOutFile, stats='median', percentil while True: ccMap = gmp.loadGCMapAsCCMap(gcmap.hdf5, mapName=mapName, resolution=gcmap.resolution, workDir=workDir) + # Because ccMap is already loaded here, directly edit matrix here + # No need to pass vmin and vmx during normalization as + # it is already taken care here + if vmin is not None or vmax is not None: + ccMap.make_editable() + if vmin is not None: + ccMap.matrix[ np.nonzero(ccMap.matrix <= vmin) ] = 0.0 + if vmax is not None: + ccMap.matrix[ np.nonzero(ccMap.matrix >= vmax) ] = 0.0 + ccMap.matrix.flush() + ccMap.make_unreadable() + try: norm_ccmap = normalizeCCMapByMCFS(ccMap, stats=stats, percentile_thershold_no_data=percentile_thershold_no_data, diff --git a/setup.py b/setup.py index 54a9e06..4f168a1 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ def read(fname): setup( name = 'gcMapExplorer', - version = '1.0.8', + version = '1.0.9', # Required packages install_requires = [ 'appdirs>=1.4', 'numpy>=1.6', 'scipy>=0.9', 'matplotlib>=1.1.0', 'dask>=0.7.3', 'toolz>=0.7.4', 'h5py>=2.2.1', 'Cython>=0.23.0' ],